first commit: one-api base code + SAAS plan document

2025-12-29 22:52:27 +08:00
commit cb7c48bfa7
564 changed files with 61468 additions and 0 deletions
--- a/relay/billing/billing.go
+++ b/relay/billing/billing.go
@@ -0,0 +1,52 @@
+package billing
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/songquanpeng/one-api/common/logger"
+	"github.com/songquanpeng/one-api/model"
+)
+
+func ReturnPreConsumedQuota(ctx context.Context, preConsumedQuota int64, tokenId int) {
+	if preConsumedQuota != 0 {
+		go func(ctx context.Context) {
+			// return pre-consumed quota
+			err := model.PostConsumeTokenQuota(tokenId, -preConsumedQuota)
+			if err != nil {
+				logger.Error(ctx, "error return pre-consumed quota: "+err.Error())
+			}
+		}(ctx)
+	}
+}
+
+func PostConsumeQuota(ctx context.Context, tokenId int, quotaDelta int64, totalQuota int64, userId int, channelId int, modelRatio float64, groupRatio float64, modelName string, tokenName string) {
+	// quotaDelta is remaining quota to be consumed
+	err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
+	if err != nil {
+		logger.SysError("error consuming token remain quota: " + err.Error())
+	}
+	err = model.CacheUpdateUserQuota(ctx, userId)
+	if err != nil {
+		logger.SysError("error update user quota cache: " + err.Error())
+	}
+	// totalQuota is total quota consumed
+	if totalQuota != 0 {
+		logContent := fmt.Sprintf("倍率：%.2f × %.2f", modelRatio, groupRatio)
+		model.RecordConsumeLog(ctx, &model.Log{
+			UserId:           userId,
+			ChannelId:        channelId,
+			PromptTokens:     int(totalQuota),
+			CompletionTokens: 0,
+			ModelName:        modelName,
+			TokenName:        tokenName,
+			Quota:            int(totalQuota),
+			Content:          logContent,
+		})
+		model.UpdateUserUsedQuotaAndRequestCount(userId, totalQuota)
+		model.UpdateChannelUsedQuota(channelId, totalQuota)
+	}
+	if totalQuota <= 0 {
+		logger.Error(ctx, fmt.Sprintf("totalQuota consumed is %d, something is wrong", totalQuota))
+	}
+}
--- a/relay/billing/ratio/group.go
+++ b/relay/billing/ratio/group.go
@@ -0,0 +1,40 @@
+package ratio
+
+import (
+	"encoding/json"
+	"github.com/songquanpeng/one-api/common/logger"
+	"sync"
+)
+
+var groupRatioLock sync.RWMutex
+var GroupRatio = map[string]float64{
+	"default": 1,
+	"vip":     1,
+	"svip":    1,
+}
+
+func GroupRatio2JSONString() string {
+	jsonBytes, err := json.Marshal(GroupRatio)
+	if err != nil {
+		logger.SysError("error marshalling model ratio: " + err.Error())
+	}
+	return string(jsonBytes)
+}
+
+func UpdateGroupRatioByJSONString(jsonStr string) error {
+	groupRatioLock.Lock()
+	defer groupRatioLock.Unlock()
+	GroupRatio = make(map[string]float64)
+	return json.Unmarshal([]byte(jsonStr), &GroupRatio)
+}
+
+func GetGroupRatio(name string) float64 {
+	groupRatioLock.RLock()
+	defer groupRatioLock.RUnlock()
+	ratio, ok := GroupRatio[name]
+	if !ok {
+		logger.SysError("group ratio not found: " + name)
+		return 1
+	}
+	return ratio
+}
--- a/relay/billing/ratio/image.go
+++ b/relay/billing/ratio/image.go
@@ -0,0 +1,66 @@
+package ratio
+
+var ImageSizeRatios = map[string]map[string]float64{
+	"dall-e-2": {
+		"256x256":   1,
+		"512x512":   1.125,
+		"1024x1024": 1.25,
+	},
+	"dall-e-3": {
+		"1024x1024": 1,
+		"1024x1792": 2,
+		"1792x1024": 2,
+	},
+	"ali-stable-diffusion-xl": {
+		"512x1024":  1,
+		"1024x768":  1,
+		"1024x1024": 1,
+		"576x1024":  1,
+		"1024x576":  1,
+	},
+	"ali-stable-diffusion-v1.5": {
+		"512x1024":  1,
+		"1024x768":  1,
+		"1024x1024": 1,
+		"576x1024":  1,
+		"1024x576":  1,
+	},
+	"wanx-v1": {
+		"1024x1024": 1,
+		"720x1280":  1,
+		"1280x720":  1,
+	},
+	"step-1x-medium": {
+		"256x256":   1,
+		"512x512":   1,
+		"768x768":   1,
+		"1024x1024": 1,
+		"1280x800":  1,
+		"800x1280":  1,
+	},
+}
+
+var ImageGenerationAmounts = map[string][2]int{
+	"dall-e-2":                  {1, 10},
+	"dall-e-3":                  {1, 1}, // OpenAI allows n=1 currently.
+	"ali-stable-diffusion-xl":   {1, 4}, // Ali
+	"ali-stable-diffusion-v1.5": {1, 4}, // Ali
+	"wanx-v1":                   {1, 4}, // Ali
+	"cogview-3":                 {1, 1},
+	"step-1x-medium":            {1, 1},
+}
+
+var ImagePromptLengthLimitations = map[string]int{
+	"dall-e-2":                  1000,
+	"dall-e-3":                  4000,
+	"ali-stable-diffusion-xl":   4000,
+	"ali-stable-diffusion-v1.5": 4000,
+	"wanx-v1":                   4000,
+	"cogview-3":                 833,
+	"step-1x-medium":            4000,
+}
+
+var ImageOriginModelName = map[string]string{
+	"ali-stable-diffusion-xl":   "stable-diffusion-xl",
+	"ali-stable-diffusion-v1.5": "stable-diffusion-v1.5",
+}
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -0,0 +1,835 @@
+package ratio
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+	"sync"
+
+	"github.com/songquanpeng/one-api/common/logger"
+)
+
+const (
+	USD2RMB   = 7
+	USD       = 500 // $0.002 = 1 -> $1 = 500
+	MILLI_USD = 1.0 / 1000 * USD
+	RMB       = USD / USD2RMB
+)
+
+var modelRatioLock sync.RWMutex
+
+// ModelRatio
+// https://platform.openai.com/docs/models/model-endpoint-compatibility
+// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Blfmc9dlf
+// https://openai.com/pricing
+// 1 === $0.002 / 1K tokens
+// 1 === ￥0.014 / 1k tokens
+var ModelRatio = map[string]float64{
+	// https://openai.com/pricing
+	"gpt-4":                   15,
+	"gpt-4-0314":              15,
+	"gpt-4-0613":              15,
+	"gpt-4-32k":               30,
+	"gpt-4-32k-0314":          30,
+	"gpt-4-32k-0613":          30,
+	"gpt-4-1106-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-0125-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-preview":     5,     // $0.01 / 1K tokens
+	"gpt-4-turbo":             5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-2024-04-09":  5,     // $0.01 / 1K tokens
+	"gpt-4o":                  2.5,   // $0.005 / 1K tokens
+	"chatgpt-4o-latest":       2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-05-13":       2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-08-06":       1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-2024-11-20":       1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-mini":             0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini-2024-07-18":  0.075, // $0.00015 / 1K tokens
+	"gpt-4-vision-preview":    5,     // $0.01 / 1K tokens
+	"gpt-3.5-turbo":           0.25,  // $0.0005 / 1K tokens
+	"gpt-3.5-turbo-0301":      0.75,
+	"gpt-3.5-turbo-0613":      0.75,
+	"gpt-3.5-turbo-16k":       1.5, // $0.003 / 1K tokens
+	"gpt-3.5-turbo-16k-0613":  1.5,
+	"gpt-3.5-turbo-instruct":  0.75, // $0.0015 / 1K tokens
+	"gpt-3.5-turbo-1106":      0.5,  // $0.001 / 1K tokens
+	"gpt-3.5-turbo-0125":      0.25, // $0.0005 / 1K tokens
+	"o1":                      7.5,  // $15.00 / 1M input tokens
+	"o1-2024-12-17":           7.5,
+	"o1-preview":              7.5, // $15.00 / 1M input tokens
+	"o1-preview-2024-09-12":   7.5,
+	"o1-mini":                 1.5, // $3.00 / 1M input tokens
+	"o1-mini-2024-09-12":      1.5,
+	"o3-mini":                 1.5, // $3.00 / 1M input tokens
+	"o3-mini-2025-01-31":      1.5,
+	"davinci-002":             1,   // $0.002 / 1K tokens
+	"babbage-002":             0.2, // $0.0004 / 1K tokens
+	"text-ada-001":            0.2,
+	"text-babbage-001":        0.25,
+	"text-curie-001":          1,
+	"text-davinci-002":        10,
+	"text-davinci-003":        10,
+	"text-davinci-edit-001":   10,
+	"code-davinci-edit-001":   10,
+	"whisper-1":               15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
+	"tts-1":                   7.5, // $0.015 / 1K characters
+	"tts-1-1106":              7.5,
+	"tts-1-hd":                15, // $0.030 / 1K characters
+	"tts-1-hd-1106":           15,
+	"davinci":                 10,
+	"curie":                   10,
+	"babbage":                 10,
+	"ada":                     10,
+	"text-embedding-ada-002":  0.05,
+	"text-embedding-3-small":  0.01,
+	"text-embedding-3-large":  0.065,
+	"text-search-ada-doc-001": 10,
+	"text-moderation-stable":  0.1,
+	"text-moderation-latest":  0.1,
+	"dall-e-2":                0.02 * USD, // $0.016 - $0.020 / image
+	"dall-e-3":                0.04 * USD, // $0.040 - $0.120 / image
+	// https://docs.anthropic.com/en/docs/about-claude/models
+	"claude-instant-1.2":         0.8 / 1000 * USD,
+	"claude-2.0":                 8.0 / 1000 * USD,
+	"claude-2.1":                 8.0 / 1000 * USD,
+	"claude-3-haiku-20240307":    0.25 / 1000 * USD,
+	"claude-3-5-haiku-20241022":  1.0 / 1000 * USD,
+	"claude-3-5-haiku-latest":    1.0 / 1000 * USD,
+	"claude-3-sonnet-20240229":   3.0 / 1000 * USD,
+	"claude-3-5-sonnet-20240620": 3.0 / 1000 * USD,
+	"claude-3-5-sonnet-20241022": 3.0 / 1000 * USD,
+	"claude-3-5-sonnet-latest":   3.0 / 1000 * USD,
+	"claude-3-opus-20240229":     15.0 / 1000 * USD,
+	// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
+	"ERNIE-4.0-8K":       0.120 * RMB,
+	"ERNIE-3.5-8K":       0.012 * RMB,
+	"ERNIE-3.5-8K-0205":  0.024 * RMB,
+	"ERNIE-3.5-8K-1222":  0.012 * RMB,
+	"ERNIE-Bot-8K":       0.024 * RMB,
+	"ERNIE-3.5-4K-0205":  0.012 * RMB,
+	"ERNIE-Speed-8K":     0.004 * RMB,
+	"ERNIE-Speed-128K":   0.004 * RMB,
+	"ERNIE-Lite-8K-0922": 0.008 * RMB,
+	"ERNIE-Lite-8K-0308": 0.003 * RMB,
+	"ERNIE-Tiny-8K":      0.001 * RMB,
+	"BLOOMZ-7B":          0.004 * RMB,
+	"Embedding-V1":       0.002 * RMB,
+	"bge-large-zh":       0.002 * RMB,
+	"bge-large-en":       0.002 * RMB,
+	"tao-8k":             0.002 * RMB,
+	// https://ai.google.dev/pricing
+	// https://cloud.google.com/vertex-ai/generative-ai/pricing
+	// "gemma-2-2b-it":                       0,
+	// "gemma-2-9b-it":                       0,
+	// "gemma-2-27b-it":                      0,
+	"gemini-pro":                          0.25 * MILLI_USD, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-1.0-pro":                      0.125 * MILLI_USD,
+	"gemini-1.5-pro":                      1.25 * MILLI_USD,
+	"gemini-1.5-pro-001":                  1.25 * MILLI_USD,
+	"gemini-1.5-pro-experimental":         1.25 * MILLI_USD,
+	"gemini-1.5-flash":                    0.075 * MILLI_USD,
+	"gemini-1.5-flash-001":                0.075 * MILLI_USD,
+	"gemini-1.5-flash-8b":                 0.0375 * MILLI_USD,
+	"gemini-2.0-flash-exp":                0.075 * MILLI_USD,
+	"gemini-2.0-flash":                    0.15 * MILLI_USD,
+	"gemini-2.0-flash-001":                0.15 * MILLI_USD,
+	"gemini-2.0-flash-lite-preview-02-05": 0.075 * MILLI_USD,
+	"gemini-2.0-flash-thinking-exp-01-21": 0.075 * MILLI_USD,
+	"gemini-2.0-pro-exp-02-05":            1.25 * MILLI_USD,
+	"aqa":                                 1,
+	// https://open.bigmodel.cn/pricing
+	"glm-zero-preview": 0.01 * RMB,
+	"glm-4-plus":       0.05 * RMB,
+	"glm-4-0520":       0.1 * RMB,
+	"glm-4-airx":       0.01 * RMB,
+	"glm-4-air":        0.0005 * RMB,
+	"glm-4-long":       0.001 * RMB,
+	"glm-4-flashx":     0.0001 * RMB,
+	"glm-4-flash":      0,
+	"glm-4":            0.1 * RMB,   // deprecated model, available until 2025/06
+	"glm-3-turbo":      0.001 * RMB, // deprecated model, available until 2025/06
+	"glm-4v-plus":      0.004 * RMB,
+	"glm-4v":           0.05 * RMB,
+	"glm-4v-flash":     0,
+	"cogview-3-plus":   0.06 * RMB,
+	"cogview-3":        0.1 * RMB,
+	"cogview-3-flash":  0,
+	"cogviewx":         0.5 * RMB,
+	"cogviewx-flash":   0,
+	"charglm-4":        0.001 * RMB,
+	"emohaa":           0.015 * RMB,
+	"codegeex-4":       0.0001 * RMB,
+	"embedding-2":      0.0005 * RMB,
+	"embedding-3":      0.0005 * RMB,
+	// https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
+	"qwen-turbo":                    0.0003 * RMB,
+	"qwen-turbo-latest":             0.0003 * RMB,
+	"qwen-plus":                     0.0008 * RMB,
+	"qwen-plus-latest":              0.0008 * RMB,
+	"qwen-max":                      0.0024 * RMB,
+	"qwen-max-latest":               0.0024 * RMB,
+	"qwen-max-longcontext":          0.0005 * RMB,
+	"qwen-vl-max":                   0.003 * RMB,
+	"qwen-vl-max-latest":            0.003 * RMB,
+	"qwen-vl-plus":                  0.0015 * RMB,
+	"qwen-vl-plus-latest":           0.0015 * RMB,
+	"qwen-vl-ocr":                   0.005 * RMB,
+	"qwen-vl-ocr-latest":            0.005 * RMB,
+	"qwen-audio-turbo":              1.4286,
+	"qwen-math-plus":                0.004 * RMB,
+	"qwen-math-plus-latest":         0.004 * RMB,
+	"qwen-math-turbo":               0.002 * RMB,
+	"qwen-math-turbo-latest":        0.002 * RMB,
+	"qwen-coder-plus":               0.0035 * RMB,
+	"qwen-coder-plus-latest":        0.0035 * RMB,
+	"qwen-coder-turbo":              0.002 * RMB,
+	"qwen-coder-turbo-latest":       0.002 * RMB,
+	"qwen-mt-plus":                  0.015 * RMB,
+	"qwen-mt-turbo":                 0.001 * RMB,
+	"qwq-32b-preview":               0.002 * RMB,
+	"qwen2.5-72b-instruct":          0.004 * RMB,
+	"qwen2.5-32b-instruct":          0.03 * RMB,
+	"qwen2.5-14b-instruct":          0.001 * RMB,
+	"qwen2.5-7b-instruct":           0.0005 * RMB,
+	"qwen2.5-3b-instruct":           0.006 * RMB,
+	"qwen2.5-1.5b-instruct":         0.0003 * RMB,
+	"qwen2.5-0.5b-instruct":         0.0003 * RMB,
+	"qwen2-72b-instruct":            0.004 * RMB,
+	"qwen2-57b-a14b-instruct":       0.0035 * RMB,
+	"qwen2-7b-instruct":             0.001 * RMB,
+	"qwen2-1.5b-instruct":           0.001 * RMB,
+	"qwen2-0.5b-instruct":           0.001 * RMB,
+	"qwen1.5-110b-chat":             0.007 * RMB,
+	"qwen1.5-72b-chat":              0.005 * RMB,
+	"qwen1.5-32b-chat":              0.0035 * RMB,
+	"qwen1.5-14b-chat":              0.002 * RMB,
+	"qwen1.5-7b-chat":               0.001 * RMB,
+	"qwen1.5-1.8b-chat":             0.001 * RMB,
+	"qwen1.5-0.5b-chat":             0.001 * RMB,
+	"qwen-72b-chat":                 0.02 * RMB,
+	"qwen-14b-chat":                 0.008 * RMB,
+	"qwen-7b-chat":                  0.006 * RMB,
+	"qwen-1.8b-chat":                0.006 * RMB,
+	"qwen-1.8b-longcontext-chat":    0.006 * RMB,
+	"qvq-72b-preview":               0.012 * RMB,
+	"qwen2.5-vl-72b-instruct":       0.016 * RMB,
+	"qwen2.5-vl-7b-instruct":        0.002 * RMB,
+	"qwen2.5-vl-3b-instruct":        0.0012 * RMB,
+	"qwen2-vl-7b-instruct":          0.016 * RMB,
+	"qwen2-vl-2b-instruct":          0.002 * RMB,
+	"qwen-vl-v1":                    0.002 * RMB,
+	"qwen-vl-chat-v1":               0.002 * RMB,
+	"qwen2-audio-instruct":          0.002 * RMB,
+	"qwen-audio-chat":               0.002 * RMB,
+	"qwen2.5-math-72b-instruct":     0.004 * RMB,
+	"qwen2.5-math-7b-instruct":      0.001 * RMB,
+	"qwen2.5-math-1.5b-instruct":    0.001 * RMB,
+	"qwen2-math-72b-instruct":       0.004 * RMB,
+	"qwen2-math-7b-instruct":        0.001 * RMB,
+	"qwen2-math-1.5b-instruct":      0.001 * RMB,
+	"qwen2.5-coder-32b-instruct":    0.002 * RMB,
+	"qwen2.5-coder-14b-instruct":    0.002 * RMB,
+	"qwen2.5-coder-7b-instruct":     0.001 * RMB,
+	"qwen2.5-coder-3b-instruct":     0.001 * RMB,
+	"qwen2.5-coder-1.5b-instruct":   0.001 * RMB,
+	"qwen2.5-coder-0.5b-instruct":   0.001 * RMB,
+	"text-embedding-v1":             0.0007 * RMB, // ￥0.0007 / 1k tokens
+	"text-embedding-v3":             0.0007 * RMB,
+	"text-embedding-v2":             0.0007 * RMB,
+	"text-embedding-async-v2":       0.0007 * RMB,
+	"text-embedding-async-v1":       0.0007 * RMB,
+	"ali-stable-diffusion-xl":       8.00,
+	"ali-stable-diffusion-v1.5":     8.00,
+	"wanx-v1":                       8.00,
+	"deepseek-r1":                   0.002 * RMB,
+	"deepseek-v3":                   0.001 * RMB,
+	"deepseek-r1-distill-qwen-1.5b": 0.001 * RMB,
+	"deepseek-r1-distill-qwen-7b":   0.0005 * RMB,
+	"deepseek-r1-distill-qwen-14b":  0.001 * RMB,
+	"deepseek-r1-distill-qwen-32b":  0.002 * RMB,
+	"deepseek-r1-distill-llama-8b":  0.0005 * RMB,
+	"deepseek-r1-distill-llama-70b": 0.004 * RMB,
+	"SparkDesk":                     1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v1.1":                1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v2.1":                1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.1":                1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.1-128K":           1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.5":                1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.5-32K":            1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v4.0":                1.2858, // ￥0.018 / 1k tokens
+	"360GPT_S2_V9":                  0.8572, // ¥0.012 / 1k tokens
+	"embedding-bert-512-v1":         0.0715, // ¥0.001 / 1k tokens
+	"embedding_s1_v1":               0.0715, // ¥0.001 / 1k tokens
+	"semantic_similarity_s1_v1":     0.0715, // ¥0.001 / 1k tokens
+	// https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
+	"hunyuan-turbo":             0.015 * RMB,
+	"hunyuan-large":             0.004 * RMB,
+	"hunyuan-large-longcontext": 0.006 * RMB,
+	"hunyuan-standard":          0.0008 * RMB,
+	"hunyuan-standard-256K":     0.0005 * RMB,
+	"hunyuan-translation-lite":  0.005 * RMB,
+	"hunyuan-role":              0.004 * RMB,
+	"hunyuan-functioncall":      0.004 * RMB,
+	"hunyuan-code":              0.004 * RMB,
+	"hunyuan-turbo-vision":      0.08 * RMB,
+	"hunyuan-vision":            0.018 * RMB,
+	"hunyuan-embedding":         0.0007 * RMB,
+	// https://platform.moonshot.cn/pricing
+	"moonshot-v1-8k":   0.012 * RMB,
+	"moonshot-v1-32k":  0.024 * RMB,
+	"moonshot-v1-128k": 0.06 * RMB,
+	// https://platform.baichuan-ai.com/price
+	"Baichuan2-Turbo":      0.008 * RMB,
+	"Baichuan2-Turbo-192k": 0.016 * RMB,
+	"Baichuan2-53B":        0.02 * RMB,
+	// https://api.minimax.chat/document/price
+	"abab6.5-chat":  0.03 * RMB,
+	"abab6.5s-chat": 0.01 * RMB,
+	"abab6-chat":    0.1 * RMB,
+	"abab5.5-chat":  0.015 * RMB,
+	"abab5.5s-chat": 0.005 * RMB,
+	// https://docs.mistral.ai/platform/pricing/
+	"open-mistral-7b":       0.25 / 1000 * USD,
+	"open-mixtral-8x7b":     0.7 / 1000 * USD,
+	"mistral-small-latest":  2.0 / 1000 * USD,
+	"mistral-medium-latest": 2.7 / 1000 * USD,
+	"mistral-large-latest":  8.0 / 1000 * USD,
+	"mistral-embed":         0.1 / 1000 * USD,
+	// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed
+	"gemma-7b-it":                           0.07 / 1000000 * USD,
+	"gemma2-9b-it":                          0.20 / 1000000 * USD,
+	"llama-3.1-70b-versatile":               0.59 / 1000000 * USD,
+	"llama-3.1-8b-instant":                  0.05 / 1000000 * USD,
+	"llama-3.2-11b-text-preview":            0.05 / 1000000 * USD,
+	"llama-3.2-11b-vision-preview":          0.05 / 1000000 * USD,
+	"llama-3.2-1b-preview":                  0.05 / 1000000 * USD,
+	"llama-3.2-3b-preview":                  0.05 / 1000000 * USD,
+	"llama-3.2-90b-text-preview":            0.59 / 1000000 * USD,
+	"llama-guard-3-8b":                      0.05 / 1000000 * USD,
+	"llama3-70b-8192":                       0.59 / 1000000 * USD,
+	"llama3-8b-8192":                        0.05 / 1000000 * USD,
+	"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000000 * USD,
+	"llama3-groq-8b-8192-tool-use-preview":  0.19 / 1000000 * USD,
+	"mixtral-8x7b-32768":                    0.24 / 1000000 * USD,
+
+	// https://platform.lingyiwanwu.com/docs#-计费单元
+	"yi-34b-chat-0205": 2.5 / 1000 * RMB,
+	"yi-34b-chat-200k": 12.0 / 1000 * RMB,
+	"yi-vl-plus":       6.0 / 1000 * RMB,
+	// https://platform.stepfun.com/docs/pricing/details
+	"step-1-8k":    0.005 / 1000 * RMB,
+	"step-1-32k":   0.015 / 1000 * RMB,
+	"step-1-128k":  0.040 / 1000 * RMB,
+	"step-1-256k":  0.095 / 1000 * RMB,
+	"step-1-flash": 0.001 / 1000 * RMB,
+	"step-2-16k":   0.038 / 1000 * RMB,
+	"step-1v-8k":   0.005 / 1000 * RMB,
+	"step-1v-32k":  0.015 / 1000 * RMB,
+	// aws llama3 https://aws.amazon.com/cn/bedrock/pricing/
+	"llama3-8b-8192(33)":  0.0003 / 0.002,  // $0.0003 / 1K tokens
+	"llama3-70b-8192(33)": 0.00265 / 0.002, // $0.00265 / 1K tokens
+	// https://cohere.com/pricing
+	"command":               0.5,
+	"command-nightly":       0.5,
+	"command-light":         0.5,
+	"command-light-nightly": 0.5,
+	"command-r":             0.5 / 1000 * USD,
+	"command-r-plus":        3.0 / 1000 * USD,
+	// https://platform.deepseek.com/api-docs/pricing/
+	"deepseek-chat":     0.14 * MILLI_USD,
+	"deepseek-reasoner": 0.55 * MILLI_USD,
+	// https://www.deepl.com/pro?cta=header-prices
+	"deepl-zh": 25.0 / 1000 * USD,
+	"deepl-en": 25.0 / 1000 * USD,
+	"deepl-ja": 25.0 / 1000 * USD,
+	// https://console.x.ai/
+	"grok-beta": 5.0 / 1000 * USD,
+	// replicate charges based on the number of generated images
+	// https://replicate.com/pricing
+	"black-forest-labs/flux-1.1-pro":                0.04 * USD,
+	"black-forest-labs/flux-1.1-pro-ultra":          0.06 * USD,
+	"black-forest-labs/flux-canny-dev":              0.025 * USD,
+	"black-forest-labs/flux-canny-pro":              0.05 * USD,
+	"black-forest-labs/flux-depth-dev":              0.025 * USD,
+	"black-forest-labs/flux-depth-pro":              0.05 * USD,
+	"black-forest-labs/flux-dev":                    0.025 * USD,
+	"black-forest-labs/flux-dev-lora":               0.032 * USD,
+	"black-forest-labs/flux-fill-dev":               0.04 * USD,
+	"black-forest-labs/flux-fill-pro":               0.05 * USD,
+	"black-forest-labs/flux-pro":                    0.055 * USD,
+	"black-forest-labs/flux-redux-dev":              0.025 * USD,
+	"black-forest-labs/flux-redux-schnell":          0.003 * USD,
+	"black-forest-labs/flux-schnell":                0.003 * USD,
+	"black-forest-labs/flux-schnell-lora":           0.02 * USD,
+	"ideogram-ai/ideogram-v2":                       0.08 * USD,
+	"ideogram-ai/ideogram-v2-turbo":                 0.05 * USD,
+	"recraft-ai/recraft-v3":                         0.04 * USD,
+	"recraft-ai/recraft-v3-svg":                     0.08 * USD,
+	"stability-ai/stable-diffusion-3":               0.035 * USD,
+	"stability-ai/stable-diffusion-3.5-large":       0.065 * USD,
+	"stability-ai/stable-diffusion-3.5-large-turbo": 0.04 * USD,
+	"stability-ai/stable-diffusion-3.5-medium":      0.035 * USD,
+	// replicate chat models
+	"ibm-granite/granite-20b-code-instruct-8k":  0.100 * USD,
+	"ibm-granite/granite-3.0-2b-instruct":       0.030 * USD,
+	"ibm-granite/granite-3.0-8b-instruct":       0.050 * USD,
+	"ibm-granite/granite-8b-code-instruct-128k": 0.050 * USD,
+	"meta/llama-2-13b":                          0.100 * USD,
+	"meta/llama-2-13b-chat":                     0.100 * USD,
+	"meta/llama-2-70b":                          0.650 * USD,
+	"meta/llama-2-70b-chat":                     0.650 * USD,
+	"meta/llama-2-7b":                           0.050 * USD,
+	"meta/llama-2-7b-chat":                      0.050 * USD,
+	"meta/meta-llama-3.1-405b-instruct":         9.500 * USD,
+	"meta/meta-llama-3-70b":                     0.650 * USD,
+	"meta/meta-llama-3-70b-instruct":            0.650 * USD,
+	"meta/meta-llama-3-8b":                      0.050 * USD,
+	"meta/meta-llama-3-8b-instruct":             0.050 * USD,
+	"mistralai/mistral-7b-instruct-v0.2":        0.050 * USD,
+	"mistralai/mistral-7b-v0.1":                 0.050 * USD,
+	"mistralai/mixtral-8x7b-instruct-v0.1":      0.300 * USD,
+	//https://openrouter.ai/models
+	"01-ai/yi-large":                                  1.5,
+	"aetherwiing/mn-starcannon-12b":                   0.6,
+	"ai21/jamba-1-5-large":                            4.0,
+	"ai21/jamba-1-5-mini":                             0.2,
+	"ai21/jamba-instruct":                             0.35,
+	"aion-labs/aion-1.0":                              6.0,
+	"aion-labs/aion-1.0-mini":                         1.2,
+	"aion-labs/aion-rp-llama-3.1-8b":                  0.1,
+	"allenai/llama-3.1-tulu-3-405b":                   5.0,
+	"alpindale/goliath-120b":                          4.6875,
+	"alpindale/magnum-72b":                            1.125,
+	"amazon/nova-lite-v1":                             0.12,
+	"amazon/nova-micro-v1":                            0.07,
+	"amazon/nova-pro-v1":                              1.6,
+	"anthracite-org/magnum-v2-72b":                    1.5,
+	"anthracite-org/magnum-v4-72b":                    1.125,
+	"anthropic/claude-2":                              12.0,
+	"anthropic/claude-2.0":                            12.0,
+	"anthropic/claude-2.0:beta":                       12.0,
+	"anthropic/claude-2.1":                            12.0,
+	"anthropic/claude-2.1:beta":                       12.0,
+	"anthropic/claude-2:beta":                         12.0,
+	"anthropic/claude-3-haiku":                        0.625,
+	"anthropic/claude-3-haiku:beta":                   0.625,
+	"anthropic/claude-3-opus":                         37.5,
+	"anthropic/claude-3-opus:beta":                    37.5,
+	"anthropic/claude-3-sonnet":                       7.5,
+	"anthropic/claude-3-sonnet:beta":                  7.5,
+	"anthropic/claude-3.5-haiku":                      2.0,
+	"anthropic/claude-3.5-haiku-20241022":             2.0,
+	"anthropic/claude-3.5-haiku-20241022:beta":        2.0,
+	"anthropic/claude-3.5-haiku:beta":                 2.0,
+	"anthropic/claude-3.5-sonnet":                     7.5,
+	"anthropic/claude-3.5-sonnet-20240620":            7.5,
+	"anthropic/claude-3.5-sonnet-20240620:beta":       7.5,
+	"anthropic/claude-3.5-sonnet:beta":                7.5,
+	"cognitivecomputations/dolphin-mixtral-8x22b":     0.45,
+	"cognitivecomputations/dolphin-mixtral-8x7b":      0.25,
+	"cohere/command":                                  0.95,
+	"cohere/command-r":                                0.7125,
+	"cohere/command-r-03-2024":                        0.7125,
+	"cohere/command-r-08-2024":                        0.285,
+	"cohere/command-r-plus":                           7.125,
+	"cohere/command-r-plus-04-2024":                   7.125,
+	"cohere/command-r-plus-08-2024":                   4.75,
+	"cohere/command-r7b-12-2024":                      0.075,
+	"databricks/dbrx-instruct":                        0.6,
+	"deepseek/deepseek-chat":                          0.445,
+	"deepseek/deepseek-chat-v2.5":                     1.0,
+	"deepseek/deepseek-chat:free":                     0.0,
+	"deepseek/deepseek-r1":                            1.2,
+	"deepseek/deepseek-r1-distill-llama-70b":          0.345,
+	"deepseek/deepseek-r1-distill-llama-70b:free":     0.0,
+	"deepseek/deepseek-r1-distill-llama-8b":           0.02,
+	"deepseek/deepseek-r1-distill-qwen-1.5b":          0.09,
+	"deepseek/deepseek-r1-distill-qwen-14b":           0.075,
+	"deepseek/deepseek-r1-distill-qwen-32b":           0.09,
+	"deepseek/deepseek-r1:free":                       0.0,
+	"eva-unit-01/eva-llama-3.33-70b":                  3.0,
+	"eva-unit-01/eva-qwen-2.5-32b":                    1.7,
+	"eva-unit-01/eva-qwen-2.5-72b":                    3.0,
+	"google/gemini-2.0-flash-001":                     0.2,
+	"google/gemini-2.0-flash-exp:free":                0.0,
+	"google/gemini-2.0-flash-lite-preview-02-05:free": 0.0,
+	"google/gemini-2.0-flash-thinking-exp-1219:free":  0.0,
+	"google/gemini-2.0-flash-thinking-exp:free":       0.0,
+	"google/gemini-2.0-pro-exp-02-05:free":            0.0,
+	"google/gemini-exp-1206:free":                     0.0,
+	"google/gemini-flash-1.5":                         0.15,
+	"google/gemini-flash-1.5-8b":                      0.075,
+	"google/gemini-flash-1.5-8b-exp":                  0.0,
+	"google/gemini-pro":                               0.75,
+	"google/gemini-pro-1.5":                           2.5,
+	"google/gemini-pro-vision":                        0.75,
+	"google/gemma-2-27b-it":                           0.135,
+	"google/gemma-2-9b-it":                            0.03,
+	"google/gemma-2-9b-it:free":                       0.0,
+	"google/gemma-7b-it":                              0.075,
+	"google/learnlm-1.5-pro-experimental:free":        0.0,
+	"google/palm-2-chat-bison":                        1.0,
+	"google/palm-2-chat-bison-32k":                    1.0,
+	"google/palm-2-codechat-bison":                    1.0,
+	"google/palm-2-codechat-bison-32k":                1.0,
+	"gryphe/mythomax-l2-13b":                          0.0325,
+	"gryphe/mythomax-l2-13b:free":                     0.0,
+	"huggingfaceh4/zephyr-7b-beta:free":               0.0,
+	"infermatic/mn-inferor-12b":                       0.6,
+	"inflection/inflection-3-pi":                      5.0,
+	"inflection/inflection-3-productivity":            5.0,
+	"jondurbin/airoboros-l2-70b":                      0.25,
+	"liquid/lfm-3b":                                   0.01,
+	"liquid/lfm-40b":                                  0.075,
+	"liquid/lfm-7b":                                   0.005,
+	"mancer/weaver":                                   1.125,
+	"meta-llama/llama-2-13b-chat":                     0.11,
+	"meta-llama/llama-2-70b-chat":                     0.45,
+	"meta-llama/llama-3-70b-instruct":                 0.2,
+	"meta-llama/llama-3-8b-instruct":                  0.03,
+	"meta-llama/llama-3-8b-instruct:free":             0.0,
+	"meta-llama/llama-3.1-405b":                       1.0,
+	"meta-llama/llama-3.1-405b-instruct":              0.4,
+	"meta-llama/llama-3.1-70b-instruct":               0.15,
+	"meta-llama/llama-3.1-8b-instruct":                0.025,
+	"meta-llama/llama-3.2-11b-vision-instruct":        0.0275,
+	"meta-llama/llama-3.2-11b-vision-instruct:free":   0.0,
+	"meta-llama/llama-3.2-1b-instruct":                0.005,
+	"meta-llama/llama-3.2-3b-instruct":                0.0125,
+	"meta-llama/llama-3.2-90b-vision-instruct":        0.8,
+	"meta-llama/llama-3.3-70b-instruct":               0.15,
+	"meta-llama/llama-3.3-70b-instruct:free":          0.0,
+	"meta-llama/llama-guard-2-8b":                     0.1,
+	"microsoft/phi-3-medium-128k-instruct":            0.5,
+	"microsoft/phi-3-medium-128k-instruct:free":       0.0,
+	"microsoft/phi-3-mini-128k-instruct":              0.05,
+	"microsoft/phi-3-mini-128k-instruct:free":         0.0,
+	"microsoft/phi-3.5-mini-128k-instruct":            0.05,
+	"microsoft/phi-4":                                 0.07,
+	"microsoft/wizardlm-2-7b":                         0.035,
+	"microsoft/wizardlm-2-8x22b":                      0.25,
+	"minimax/minimax-01":                              0.55,
+	"mistralai/codestral-2501":                        0.45,
+	"mistralai/codestral-mamba":                       0.125,
+	"mistralai/ministral-3b":                          0.02,
+	"mistralai/ministral-8b":                          0.05,
+	"mistralai/mistral-7b-instruct":                   0.0275,
+	"mistralai/mistral-7b-instruct-v0.1":              0.1,
+	"mistralai/mistral-7b-instruct-v0.3":              0.0275,
+	"mistralai/mistral-7b-instruct:free":              0.0,
+	"mistralai/mistral-large":                         3.0,
+	"mistralai/mistral-large-2407":                    3.0,
+	"mistralai/mistral-large-2411":                    3.0,
+	"mistralai/mistral-medium":                        4.05,
+	"mistralai/mistral-nemo":                          0.04,
+	"mistralai/mistral-nemo:free":                     0.0,
+	"mistralai/mistral-small":                         0.3,
+	"mistralai/mistral-small-24b-instruct-2501":       0.07,
+	"mistralai/mistral-small-24b-instruct-2501:free":  0.0,
+	"mistralai/mistral-tiny":                          0.125,
+	"mistralai/mixtral-8x22b-instruct":                0.45,
+	"mistralai/mixtral-8x7b":                          0.3,
+	"mistralai/mixtral-8x7b-instruct":                 0.12,
+	"mistralai/pixtral-12b":                           0.05,
+	"mistralai/pixtral-large-2411":                    3.0,
+	"neversleep/llama-3-lumimaid-70b":                 2.25,
+	"neversleep/llama-3-lumimaid-8b":                  0.5625,
+	"neversleep/llama-3-lumimaid-8b:extended":         0.5625,
+	"neversleep/llama-3.1-lumimaid-70b":               2.25,
+	"neversleep/llama-3.1-lumimaid-8b":                0.5625,
+	"neversleep/noromaid-20b":                         1.125,
+	"nothingiisreal/mn-celeste-12b":                   0.6,
+	"nousresearch/hermes-2-pro-llama-3-8b":            0.02,
+	"nousresearch/hermes-3-llama-3.1-405b":            0.4,
+	"nousresearch/hermes-3-llama-3.1-70b":             0.15,
+	"nousresearch/nous-hermes-2-mixtral-8x7b-dpo":     0.3,
+	"nousresearch/nous-hermes-llama2-13b":             0.085,
+	"nvidia/llama-3.1-nemotron-70b-instruct":          0.15,
+	"nvidia/llama-3.1-nemotron-70b-instruct:free":     0.0,
+	"openai/chatgpt-4o-latest":                        7.5,
+	"openai/gpt-3.5-turbo":                            0.75,
+	"openai/gpt-3.5-turbo-0125":                       0.75,
+	"openai/gpt-3.5-turbo-0613":                       1.0,
+	"openai/gpt-3.5-turbo-1106":                       1.0,
+	"openai/gpt-3.5-turbo-16k":                        2.0,
+	"openai/gpt-3.5-turbo-instruct":                   1.0,
+	"openai/gpt-4":                                    30.0,
+	"openai/gpt-4-0314":                               30.0,
+	"openai/gpt-4-1106-preview":                       15.0,
+	"openai/gpt-4-32k":                                60.0,
+	"openai/gpt-4-32k-0314":                           60.0,
+	"openai/gpt-4-turbo":                              15.0,
+	"openai/gpt-4-turbo-preview":                      15.0,
+	"openai/gpt-4o":                                   5.0,
+	"openai/gpt-4o-2024-05-13":                        7.5,
+	"openai/gpt-4o-2024-08-06":                        5.0,
+	"openai/gpt-4o-2024-11-20":                        5.0,
+	"openai/gpt-4o-mini":                              0.3,
+	"openai/gpt-4o-mini-2024-07-18":                   0.3,
+	"openai/gpt-4o:extended":                          9.0,
+	"openai/o1":                                       30.0,
+	"openai/o1-mini":                                  2.2,
+	"openai/o1-mini-2024-09-12":                       2.2,
+	"openai/o1-preview":                               30.0,
+	"openai/o1-preview-2024-09-12":                    30.0,
+	"openai/o3-mini":                                  2.2,
+	"openai/o3-mini-high":                             2.2,
+	"openchat/openchat-7b":                            0.0275,
+	"openchat/openchat-7b:free":                       0.0,
+	"openrouter/auto":                                 -500000.0,
+	"perplexity/llama-3.1-sonar-huge-128k-online":     2.5,
+	"perplexity/llama-3.1-sonar-large-128k-chat":      0.5,
+	"perplexity/llama-3.1-sonar-large-128k-online":    0.5,
+	"perplexity/llama-3.1-sonar-small-128k-chat":      0.1,
+	"perplexity/llama-3.1-sonar-small-128k-online":    0.1,
+	"perplexity/sonar":                                0.5,
+	"perplexity/sonar-reasoning":                      2.5,
+	"pygmalionai/mythalion-13b":                       0.6,
+	"qwen/qvq-72b-preview":                            0.25,
+	"qwen/qwen-2-72b-instruct":                        0.45,
+	"qwen/qwen-2-7b-instruct":                         0.027,
+	"qwen/qwen-2-7b-instruct:free":                    0.0,
+	"qwen/qwen-2-vl-72b-instruct":                     0.2,
+	"qwen/qwen-2-vl-7b-instruct":                      0.05,
+	"qwen/qwen-2.5-72b-instruct":                      0.2,
+	"qwen/qwen-2.5-7b-instruct":                       0.025,
+	"qwen/qwen-2.5-coder-32b-instruct":                0.08,
+	"qwen/qwen-max":                                   3.2,
+	"qwen/qwen-plus":                                  0.6,
+	"qwen/qwen-turbo":                                 0.1,
+	"qwen/qwen-vl-plus:free":                          0.0,
+	"qwen/qwen2.5-vl-72b-instruct:free":               0.0,
+	"qwen/qwq-32b-preview":                            0.09,
+	"raifle/sorcererlm-8x22b":                         2.25,
+	"sao10k/fimbulvetr-11b-v2":                        0.6,
+	"sao10k/l3-euryale-70b":                           0.4,
+	"sao10k/l3-lunaris-8b":                            0.03,
+	"sao10k/l3.1-70b-hanami-x1":                       1.5,
+	"sao10k/l3.1-euryale-70b":                         0.4,
+	"sao10k/l3.3-euryale-70b":                         0.4,
+	"sophosympatheia/midnight-rose-70b":               0.4,
+	"sophosympatheia/rogue-rose-103b-v0.2:free":       0.0,
+	"teknium/openhermes-2.5-mistral-7b":               0.085,
+	"thedrummer/rocinante-12b":                        0.25,
+	"thedrummer/unslopnemo-12b":                       0.25,
+	"undi95/remm-slerp-l2-13b":                        0.6,
+	"undi95/toppy-m-7b":                               0.035,
+	"undi95/toppy-m-7b:free":                          0.0,
+	"x-ai/grok-2-1212":                                5.0,
+	"x-ai/grok-2-vision-1212":                         5.0,
+	"x-ai/grok-beta":                                  7.5,
+	"x-ai/grok-vision-beta":                           7.5,
+	"xwin-lm/xwin-lm-70b":                             1.875,
+}
+
+var CompletionRatio = map[string]float64{
+	// aws llama3
+	"llama3-8b-8192(33)":  0.0006 / 0.0003,
+	"llama3-70b-8192(33)": 0.0035 / 0.00265,
+	// whisper
+	"whisper-1": 0, // only count input tokens
+	// deepseek
+	"deepseek-chat":     0.28 / 0.14,
+	"deepseek-reasoner": 2.19 / 0.55,
+}
+
+var (
+	DefaultModelRatio      map[string]float64
+	DefaultCompletionRatio map[string]float64
+)
+
+func init() {
+	DefaultModelRatio = make(map[string]float64)
+	for k, v := range ModelRatio {
+		DefaultModelRatio[k] = v
+	}
+	DefaultCompletionRatio = make(map[string]float64)
+	for k, v := range CompletionRatio {
+		DefaultCompletionRatio[k] = v
+	}
+}
+
+func AddNewMissingRatio(oldRatio string) string {
+	newRatio := make(map[string]float64)
+	err := json.Unmarshal([]byte(oldRatio), &newRatio)
+	if err != nil {
+		logger.SysError("error unmarshalling old ratio: " + err.Error())
+		return oldRatio
+	}
+	for k, v := range DefaultModelRatio {
+		if _, ok := newRatio[k]; !ok {
+			newRatio[k] = v
+		}
+	}
+	jsonBytes, err := json.Marshal(newRatio)
+	if err != nil {
+		logger.SysError("error marshalling new ratio: " + err.Error())
+		return oldRatio
+	}
+	return string(jsonBytes)
+}
+
+func ModelRatio2JSONString() string {
+	jsonBytes, err := json.Marshal(ModelRatio)
+	if err != nil {
+		logger.SysError("error marshalling model ratio: " + err.Error())
+	}
+	return string(jsonBytes)
+}
+
+func UpdateModelRatioByJSONString(jsonStr string) error {
+	modelRatioLock.Lock()
+	defer modelRatioLock.Unlock()
+	ModelRatio = make(map[string]float64)
+	return json.Unmarshal([]byte(jsonStr), &ModelRatio)
+}
+
+func GetModelRatio(name string, channelType int) float64 {
+	modelRatioLock.RLock()
+	defer modelRatioLock.RUnlock()
+	if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
+		name = strings.TrimSuffix(name, "-internet")
+	}
+	if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
+		name = strings.TrimSuffix(name, "-internet")
+	}
+	model := fmt.Sprintf("%s(%d)", name, channelType)
+	if ratio, ok := ModelRatio[model]; ok {
+		return ratio
+	}
+	if ratio, ok := DefaultModelRatio[model]; ok {
+		return ratio
+	}
+	if ratio, ok := ModelRatio[name]; ok {
+		return ratio
+	}
+	if ratio, ok := DefaultModelRatio[name]; ok {
+		return ratio
+	}
+	logger.SysError("model ratio not found: " + name)
+	return 30
+}
+
+func CompletionRatio2JSONString() string {
+	jsonBytes, err := json.Marshal(CompletionRatio)
+	if err != nil {
+		logger.SysError("error marshalling completion ratio: " + err.Error())
+	}
+	return string(jsonBytes)
+}
+
+func UpdateCompletionRatioByJSONString(jsonStr string) error {
+	CompletionRatio = make(map[string]float64)
+	return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
+}
+
+func GetCompletionRatio(name string, channelType int) float64 {
+	if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
+		name = strings.TrimSuffix(name, "-internet")
+	}
+	model := fmt.Sprintf("%s(%d)", name, channelType)
+	if ratio, ok := CompletionRatio[model]; ok {
+		return ratio
+	}
+	if ratio, ok := DefaultCompletionRatio[model]; ok {
+		return ratio
+	}
+	if ratio, ok := CompletionRatio[name]; ok {
+		return ratio
+	}
+	if ratio, ok := DefaultCompletionRatio[name]; ok {
+		return ratio
+	}
+	if strings.HasPrefix(name, "gpt-3.5") {
+		if name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125") {
+			// https://openai.com/blog/new-embedding-models-and-api-updates
+			// Updated GPT-3.5 Turbo model and lower pricing
+			return 3
+		}
+		if strings.HasSuffix(name, "1106") {
+			return 2
+		}
+		return 4.0 / 3.0
+	}
+	if strings.HasPrefix(name, "gpt-4") {
+		if strings.HasPrefix(name, "gpt-4o") {
+			if name == "gpt-4o-2024-05-13" {
+				return 3
+			}
+			return 4
+		}
+		if strings.HasPrefix(name, "gpt-4-turbo") ||
+			strings.HasSuffix(name, "preview") {
+			return 3
+		}
+		return 2
+	}
+	// including o1, o1-preview, o1-mini
+	if strings.HasPrefix(name, "o1") {
+		return 4
+	}
+	if name == "chatgpt-4o-latest" {
+		return 3
+	}
+	if strings.HasPrefix(name, "claude-3") {
+		return 5
+	}
+	if strings.HasPrefix(name, "claude-") {
+		return 3
+	}
+	if strings.HasPrefix(name, "mistral-") {
+		return 3
+	}
+	if strings.HasPrefix(name, "gemini-") {
+		return 3
+	}
+	if strings.HasPrefix(name, "deepseek-") {
+		return 2
+	}
+
+	switch name {
+	case "llama2-70b-4096":
+		return 0.8 / 0.64
+	case "llama3-8b-8192":
+		return 2
+	case "llama3-70b-8192":
+		return 0.79 / 0.59
+	case "command", "command-light", "command-nightly", "command-light-nightly":
+		return 2
+	case "command-r":
+		return 3
+	case "command-r-plus":
+		return 5
+	case "grok-beta":
+		return 3
+	// Replicate Models
+	// https://replicate.com/pricing
+	case "ibm-granite/granite-20b-code-instruct-8k":
+		return 5
+	case "ibm-granite/granite-3.0-2b-instruct":
+		return 8.333333333333334
+	case "ibm-granite/granite-3.0-8b-instruct",
+		"ibm-granite/granite-8b-code-instruct-128k":
+		return 5
+	case "meta/llama-2-13b",
+		"meta/llama-2-13b-chat",
+		"meta/llama-2-7b",
+		"meta/llama-2-7b-chat",
+		"meta/meta-llama-3-8b",
+		"meta/meta-llama-3-8b-instruct":
+		return 5
+	case "meta/llama-2-70b",
+		"meta/llama-2-70b-chat",
+		"meta/meta-llama-3-70b",
+		"meta/meta-llama-3-70b-instruct":
+		return 2.750 / 0.650 // ≈4.230769
+	case "meta/meta-llama-3.1-405b-instruct":
+		return 1
+	case "mistralai/mistral-7b-instruct-v0.2",
+		"mistralai/mistral-7b-v0.1":
+		return 5
+	case "mistralai/mixtral-8x7b-instruct-v0.1":
+		return 1.000 / 0.300 // ≈3.333333
+	}
+
+	return 1
+}