refactor: Introduce pre-consume quota and unify relay handlers

This commit introduces a major architectural refactoring to improve quota management, centralize logging, and streamline the relay handling logic. Key changes: - **Pre-consume Quota:** Implements a new mechanism to check and reserve user quota *before* making the request to the upstream provider. This ensures more accurate quota deduction and prevents users from exceeding their limits due to concurrent requests. - **Unified Relay Handlers:** Refactors the relay logic to use generic handlers (e.g., `ChatHandler`, `ImageHandler`) instead of provider-specific implementations. This significantly reduces code duplication and simplifies adding new channels. - **Centralized Logger:** A new dedicated `logger` package is introduced, and all system logging calls are migrated to use it, moving this responsibility out of the `common` package. - **Code Reorganization:** DTOs are generalized (e.g., `dalle.go` -> `openai_image.go`) and utility code is moved to more appropriate packages (e.g., `common/http.go` -> `service/http.go`) for better code structure.
2025-08-14 20:05:06 +08:00
parent 17bab355e4
commit e2037ad756
113 changed files with 3095 additions and 2518 deletions
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -2,8 +2,12 @@ package dto

 import (
 	"encoding/json"
+	"fmt"
 	"one-api/common"
+	"one-api/types"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type ResponseFormat struct {
@@ -67,6 +71,116 @@ type GeneralOpenAIRequest struct {
 	Extra map[string]json.RawMessage `json:"-"`
 }

+func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
+	var tokenCountMeta types.TokenCountMeta
+	var texts = make([]string, 0)
+	var fileMeta = make([]*types.FileMeta, 0)
+
+	if r.Prompt != nil {
+		switch v := r.Prompt.(type) {
+		case string:
+			texts = append(texts, v)
+		case []any:
+			for _, item := range v {
+				if str, ok := item.(string); ok {
+					texts = append(texts, str)
+				}
+			}
+		default:
+			texts = append(texts, fmt.Sprintf("%v", r.Prompt))
+		}
+	}
+
+	if r.Input != nil {
+		inputs := r.ParseInput()
+		texts = append(texts, inputs...)
+	}
+
+	if r.MaxCompletionTokens > r.MaxTokens {
+		tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
+	} else {
+		tokenCountMeta.MaxTokens = int(r.MaxTokens)
+	}
+
+	for _, message := range r.Messages {
+		tokenCountMeta.MessagesCount++
+		texts = append(texts, message.Role)
+		if message.Content != nil {
+			if message.Name != nil {
+				tokenCountMeta.NameCount++
+				texts = append(texts, *message.Name)
+			}
+			arrayContent := message.ParseContent()
+			for _, m := range arrayContent {
+				if m.Type == ContentTypeImageURL {
+					imageUrl := m.GetImageMedia()
+					if imageUrl != nil {
+						meta := &types.FileMeta{
+							FileType: types.FileTypeImage,
+						}
+						meta.Data = imageUrl.Url
+						meta.Detail = imageUrl.Detail
+						fileMeta = append(fileMeta, meta)
+					}
+				} else if m.Type == ContentTypeInputAudio {
+					inputAudio := m.GetInputAudio()
+					if inputAudio != nil {
+						meta := &types.FileMeta{
+							FileType: types.FileTypeAudio,
+						}
+						meta.Data = inputAudio.Data
+						fileMeta = append(fileMeta, meta)
+					}
+				} else if m.Type == ContentTypeFile {
+					file := m.GetFile()
+					if file != nil {
+						meta := &types.FileMeta{
+							FileType: types.FileTypeFile,
+						}
+						meta.Data = file.FileData
+						fileMeta = append(fileMeta, meta)
+					}
+				} else if m.Type == ContentTypeVideoUrl {
+					videoUrl := m.GetVideoUrl()
+					if videoUrl != nil {
+						meta := &types.FileMeta{
+							FileType: types.FileTypeVideo,
+						}
+						meta.Data = videoUrl.Url
+						fileMeta = append(fileMeta, meta)
+					}
+				} else {
+					texts = append(texts, m.Text)
+				}
+			}
+		}
+	}
+
+	if r.Tools != nil {
+		openaiTools := r.Tools
+		for _, tool := range openaiTools {
+			tokenCountMeta.ToolsCount++
+			texts = append(texts, tool.Function.Name)
+			if tool.Function.Description != "" {
+				texts = append(texts, tool.Function.Description)
+			}
+			if tool.Function.Parameters != nil {
+				texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
+			}
+		}
+		//toolTokens := CountTokenInput(countStr, request.Model)
+		//tkm += 8
+		//tkm += toolTokens
+	}
+	tokenCountMeta.CombineText = strings.Join(texts, "\n")
+	tokenCountMeta.Files = fileMeta
+	return &tokenCountMeta
+}
+
+func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
+	return r.Stream
+}
+
 func (r *GeneralOpenAIRequest) ToMap() map[string]any {
 	result := make(map[string]any)
 	data, _ := common.Marshal(r)
@@ -202,10 +316,25 @@ func (m *MediaContent) GetFile() *MessageFile {
 	return nil
 }

+func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
+	if m.VideoUrl != nil {
+		if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
+			return m.VideoUrl.(*MessageVideoUrl)
+		}
+		if itemMap, ok := m.VideoUrl.(map[string]any); ok {
+			out := &MessageVideoUrl{
+				Url: common.Interface2String(itemMap["url"]),
+			}
+			return out
+		}
+	}
+	return nil
+}
+
 type MessageImageUrl struct {
-	Url      string `json:"url"`
-	Detail   string `json:"detail"`
-	MimeType string
+	Url    string `json:"url"`
+	Detail string `json:"detail"`
+	//MimeType string
 }

 func (m *MessageImageUrl) IsRemoteImage() bool {
@@ -233,6 +362,7 @@ const (
 	ContentTypeInputAudio = "input_audio"
 	ContentTypeFile       = "file"
 	ContentTypeVideoUrl   = "video_url" // 阿里百炼视频识别
+	//ContentTypeAudioUrl   = "audio_url"
 )

 func (m *Message) GetPrefix() bool {
@@ -623,7 +753,7 @@ type WebSearchOptions struct {
 // https://platform.openai.com/docs/api-reference/responses/create
 type OpenAIResponsesRequest struct {
 	Model              string           `json:"model"`
-	Input              json.RawMessage  `json:"input,omitempty"`
+	Input              any              `json:"input,omitempty"`
 	Include            json.RawMessage  `json:"include,omitempty"`
 	Instructions       json.RawMessage  `json:"instructions,omitempty"`
 	MaxOutputTokens    uint             `json:"max_output_tokens,omitempty"`
@@ -645,28 +775,145 @@ type OpenAIResponsesRequest struct {
 	Prompt             json.RawMessage  `json:"prompt,omitempty"`
 }

+func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
+	var fileMeta = make([]*types.FileMeta, 0)
+	var texts = make([]string, 0)
+
+	if r.Input != nil {
+		inputs := r.ParseInput()
+		for _, input := range inputs {
+			if input.Type == "input_image" {
+				fileMeta = append(fileMeta, &types.FileMeta{
+					FileType: types.FileTypeImage,
+					Data:     input.ImageUrl,
+					Detail:   input.Detail,
+				})
+			} else if input.Type == "input_file" {
+				fileMeta = append(fileMeta, &types.FileMeta{
+					FileType: types.FileTypeFile,
+					Data:     input.FileUrl,
+				})
+			} else {
+				texts = append(texts, input.Text)
+			}
+		}
+	}
+
+	if len(r.Instructions) > 0 {
+		texts = append(texts, string(r.Instructions))
+	}
+
+	if len(r.Metadata) > 0 {
+		texts = append(texts, string(r.Metadata))
+	}
+
+	if len(r.Text) > 0 {
+		texts = append(texts, string(r.Text))
+	}
+
+	if len(r.ToolChoice) > 0 {
+		texts = append(texts, string(r.ToolChoice))
+	}
+
+	if len(r.Prompt) > 0 {
+		texts = append(texts, string(r.Prompt))
+	}
+
+	if len(r.Tools) > 0 {
+		toolStr, _ := common.Marshal(r.Tools)
+		texts = append(texts, string(toolStr))
+	}
+
+	return &types.TokenCountMeta{
+		CombineText: strings.Join(texts, "\n"),
+		Files:       fileMeta,
+		MaxTokens:   int(r.MaxOutputTokens),
+	}
+}
+
+func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
+	return r.Stream
+}
+
 type Reasoning struct {
 	Effort  string `json:"effort,omitempty"`
 	Summary string `json:"summary,omitempty"`
 }

-//type ResponsesToolsCall struct {
-//	Type string `json:"type"`
-//	// Web Search
-//	UserLocation      json.RawMessage `json:"user_location,omitempty"`
-//	SearchContextSize string          `json:"search_context_size,omitempty"`
-//	// File Search
-//	VectorStoreIds []string        `json:"vector_store_ids,omitempty"`
-//	MaxNumResults  uint            `json:"max_num_results,omitempty"`
-//	Filters        json.RawMessage `json:"filters,omitempty"`
-//	// Computer Use
-//	DisplayWidth  uint   `json:"display_width,omitempty"`
-//	DisplayHeight uint   `json:"display_height,omitempty"`
-//	Environment   string `json:"environment,omitempty"`
-//	// Function
-//	Name        string          `json:"name,omitempty"`
-//	Description string          `json:"description,omitempty"`
-//	Parameters  json.RawMessage `json:"parameters,omitempty"`
-//	Function    json.RawMessage `json:"function,omitempty"`
-//	Container   json.RawMessage `json:"container,omitempty"`
-//}
+type MediaInput struct {
+	Type     string `json:"type"`
+	Text     string `json:"text,omitempty"`
+	FileUrl  string `json:"file_url,omitempty"`
+	ImageUrl string `json:"image_url,omitempty"`
+	Detail   string `json:"detail,omitempty"` // 仅 input_image 有效
+}
+
+// ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
+// Reference implementation mirrors Message.ParseContent:
+//   - input can be a string, treated as an input_text item
+//   - input can be an array of objects with a `type` field
+//     supported types: input_text, input_image, input_file
+func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
+	if r.Input == nil {
+		return nil
+	}
+
+	var inputs []MediaInput
+
+	// Try string first
+	if str, ok := r.Input.(string); ok {
+		inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
+		return inputs
+	}
+
+	// Try array of parts
+	if array, ok := r.Input.([]any); ok {
+		for _, itemAny := range array {
+			// Already parsed MediaInput
+			if media, ok := itemAny.(MediaInput); ok {
+				inputs = append(inputs, media)
+				continue
+			}
+			// Generic map
+			item, ok := itemAny.(map[string]any)
+			if !ok {
+				continue
+			}
+			typeVal, ok := item["type"].(string)
+			if !ok {
+				continue
+			}
+			switch typeVal {
+			case "input_text":
+				text, _ := item["text"].(string)
+				inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
+			case "input_image":
+				// image_url may be string or object with url field
+				var imageUrl string
+				switch v := item["image_url"].(type) {
+				case string:
+					imageUrl = v
+				case map[string]any:
+					if url, ok := v["url"].(string); ok {
+						imageUrl = url
+					}
+				}
+				inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
+			case "input_file":
+				// file_url may be string or object with url field
+				var fileUrl string
+				switch v := item["file_url"].(type) {
+				case string:
+					fileUrl = v
+				case map[string]any:
+					if url, ok := v["url"].(string); ok {
+						fileUrl = url
+					}
+				}
+				inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
+			}
+		}
+	}
+
+	return inputs
+}