refactor: Introduce pre-consume quota and unify relay handlers

This commit introduces a major architectural refactoring to improve quota management, centralize logging, and streamline the relay handling logic.

Key changes:
- **Pre-consume Quota:** Implements a new mechanism to check and reserve user quota *before* making the request to the upstream provider. This ensures more accurate quota deduction and prevents users from exceeding their limits due to concurrent requests.

- **Unified Relay Handlers:** Refactors the relay logic to use generic handlers (e.g., `ChatHandler`, `ImageHandler`) instead of provider-specific implementations. This significantly reduces code duplication and simplifies adding new channels.

- **Centralized Logger:** A new dedicated `logger` package is introduced, and all system logging calls are migrated to use it, moving this responsibility out of the `common` package.

- **Code Reorganization:** DTOs are generalized (e.g., `dalle.go` -> `openai_image.go`) and utility code is moved to more appropriate packages (e.g., `common/http.go` -> `service/http.go`) for better code structure.
This commit is contained in:
CaIon
2025-08-14 20:05:06 +08:00
parent 17bab355e4
commit e2037ad756
113 changed files with 3095 additions and 2518 deletions

View File

@@ -39,12 +39,13 @@ const (
ErrorCodeSensitiveWordsDetected ErrorCode = "sensitive_words_detected"
// new api error
ErrorCodeCountTokenFailed ErrorCode = "count_token_failed"
ErrorCodeModelPriceError ErrorCode = "model_price_error"
ErrorCodeInvalidApiType ErrorCode = "invalid_api_type"
ErrorCodeJsonMarshalFailed ErrorCode = "json_marshal_failed"
ErrorCodeDoRequestFailed ErrorCode = "do_request_failed"
ErrorCodeGetChannelFailed ErrorCode = "get_channel_failed"
ErrorCodeCountTokenFailed ErrorCode = "count_token_failed"
ErrorCodeModelPriceError ErrorCode = "model_price_error"
ErrorCodeInvalidApiType ErrorCode = "invalid_api_type"
ErrorCodeJsonMarshalFailed ErrorCode = "json_marshal_failed"
ErrorCodeDoRequestFailed ErrorCode = "do_request_failed"
ErrorCodeGetChannelFailed ErrorCode = "get_channel_failed"
ErrorCodeGenRelayInfoFailed ErrorCode = "gen_relay_info_failed"
// channel error
ErrorCodeChannelNoAvailableKey ErrorCode = "channel:no_available_key"

31
types/price_data.go Normal file
View File

@@ -0,0 +1,31 @@
package types
import "fmt"
type GroupRatioInfo struct {
GroupRatio float64
GroupSpecialRatio float64
HasSpecialRatio bool
}
type PriceData struct {
ModelPrice float64
ModelRatio float64
CompletionRatio float64
CacheRatio float64
CacheCreationRatio float64
ImageRatio float64
UsePrice bool
ShouldPreConsumedQuota int
GroupRatioInfo GroupRatioInfo
}
type PerCallPriceData struct {
ModelPrice float64
Quota int
GroupRatioInfo GroupRatioInfo
}
func (p PriceData) ToSetting() string {
return fmt.Sprintf("ModelPrice: %f, ModelRatio: %f, CompletionRatio: %f, CacheRatio: %f, GroupRatio: %f, UsePrice: %t, CacheCreationRatio: %f, ShouldPreConsumedQuota: %d, ImageRatio: %f", p.ModelPrice, p.ModelRatio, p.CompletionRatio, p.CacheRatio, p.GroupRatioInfo.GroupRatio, p.UsePrice, p.CacheCreationRatio, p.ShouldPreConsumedQuota, p.ImageRatio)
}

15
types/relay_format.go Normal file
View File

@@ -0,0 +1,15 @@
package types
type RelayFormat string
const (
RelayFormatOpenAI RelayFormat = "openai"
RelayFormatClaude = "claude"
RelayFormatGemini = "gemini"
RelayFormatOpenAIResponses = "openai_responses"
RelayFormatOpenAIAudio = "openai_audio"
RelayFormatOpenAIImage = "openai_image"
RelayFormatOpenAIRealtime = "openai_realtime"
RelayFormatRerank = "rerank"
RelayFormatEmbedding = "embedding"
)

27
types/relay_request.go Normal file
View File

@@ -0,0 +1,27 @@
package types
type RelayRequest struct {
OriginRequest any
Format RelayFormat
PromptTokenCount int
}
func (r *RelayRequest) CopyOriginRequest() any {
if r.OriginRequest == nil {
return nil
}
switch v := r.OriginRequest.(type) {
case *GeneralOpenAIRequest:
return v.Copy()
case *GeneralClaudeRequest:
return v.Copy()
case *GeneralGeminiRequest:
return v.Copy()
case *GeneralRerankRequest:
return v.Copy()
case *GeneralEmbeddingRequest:
return v.Copy()
default:
return nil
}
}

45
types/request_meta.go Normal file
View File

@@ -0,0 +1,45 @@
package types
type FileType string
const (
FileTypeImage FileType = "image" // Image file type
FileTypeAudio FileType = "audio" // Audio file type
FileTypeVideo FileType = "video" // Video file type
FileTypeFile FileType = "file" // Generic file type
)
type TokenType string
const (
TokenTypeTextNumber TokenType = "text_number" // Text or number tokens
TokenTypeTokenizer TokenType = "tokenizer" // Tokenizer tokens
TokenTypeImage TokenType = "image" // Image tokens
)
type TokenCountMeta struct {
TokenType TokenType `json:"token_type,omitempty"` // Type of tokens used in the request
CombineText string `json:"combine_text,omitempty"` // Combined text from all messages
ToolsCount int `json:"tools_count,omitempty"` // Number of tools used
NameCount int `json:"name_count,omitempty"` // Number of names in the request
MessagesCount int `json:"messages_count,omitempty"` // Number of messages in the request
Files []*FileMeta `json:"files,omitempty"` // List of files, each with type and content
MaxTokens int `json:"max_tokens,omitempty"` // Maximum tokens allowed in the request
ImagePriceRatio float64 `json:"image_ratio,omitempty"` // Ratio for image size, if applicable
//IsStreaming bool `json:"is_streaming,omitempty"` // Indicates if the request is streaming
}
type FileMeta struct {
FileType
MimeType string
Data string
Detail string
}
type RequestMeta struct {
OriginalModelName string `json:"original_model_name"`
UserUsingGroup string `json:"user_using_group"`
PromptTokens int `json:"prompt_tokens"`
PreConsumedQuota int `json:"pre_consumed_quota"`
}