- feat: add local quota tracking for gemini tiers (Legacy/Pro/Ultra) - feat: implement PreCheckUsage in RateLimitService - feat: align gemini daily reset window with PST - fix: sticky session fallback logic
175 lines
6.8 KiB
YAML
175 lines
6.8 KiB
YAML
# Sub2API Configuration File
|
||
# Copy this file to /etc/sub2api/config.yaml and modify as needed
|
||
# Documentation: https://github.com/Wei-Shaw/sub2api
|
||
|
||
# =============================================================================
|
||
# Server Configuration
|
||
# =============================================================================
|
||
server:
|
||
# Bind address (0.0.0.0 for all interfaces)
|
||
host: "0.0.0.0"
|
||
# Port to listen on
|
||
port: 8080
|
||
# Mode: "debug" for development, "release" for production
|
||
mode: "release"
|
||
|
||
# =============================================================================
|
||
# Run Mode Configuration
|
||
# =============================================================================
|
||
# Run mode: "standard" (default) or "simple" (for internal use)
|
||
# - standard: Full SaaS features with billing/balance checks
|
||
# - simple: Hides SaaS features and skips billing/balance checks
|
||
run_mode: "standard"
|
||
|
||
# =============================================================================
|
||
# 网关配置
|
||
# =============================================================================
|
||
gateway:
|
||
# 等待上游响应头超时时间(秒)
|
||
response_header_timeout: 300
|
||
# 请求体最大字节数(默认 100MB)
|
||
max_body_size: 104857600
|
||
# 连接池隔离策略:
|
||
# - proxy: 按代理隔离,同一代理共享连接池(适合代理少、账户多)
|
||
# - account: 按账户隔离,同一账户共享连接池(适合账户少、需严格隔离)
|
||
# - account_proxy: 按账户+代理组合隔离(默认,最细粒度)
|
||
connection_pool_isolation: "account_proxy"
|
||
# HTTP 上游连接池配置(HTTP/2 + 多代理场景默认)
|
||
max_idle_conns: 240
|
||
max_idle_conns_per_host: 120
|
||
max_conns_per_host: 240
|
||
idle_conn_timeout_seconds: 300
|
||
# 上游连接池客户端缓存配置
|
||
# max_upstream_clients: 最大缓存客户端数量,超出后淘汰最久未使用的
|
||
# client_idle_ttl_seconds: 客户端空闲回收阈值(秒),超时且无活跃请求时回收
|
||
max_upstream_clients: 5000
|
||
client_idle_ttl_seconds: 900
|
||
# 并发槽位过期时间(分钟)
|
||
concurrency_slot_ttl_minutes: 15
|
||
|
||
# =============================================================================
|
||
# Database Configuration (PostgreSQL)
|
||
# =============================================================================
|
||
database:
|
||
host: "localhost"
|
||
port: 5432
|
||
user: "postgres"
|
||
password: "your_secure_password_here"
|
||
dbname: "sub2api"
|
||
# SSL mode: disable, require, verify-ca, verify-full
|
||
sslmode: "disable"
|
||
|
||
# =============================================================================
|
||
# Redis Configuration
|
||
# =============================================================================
|
||
redis:
|
||
host: "localhost"
|
||
port: 6379
|
||
# Leave empty if no password is set
|
||
password: ""
|
||
# Database number (0-15)
|
||
db: 0
|
||
|
||
# =============================================================================
|
||
# JWT Configuration
|
||
# =============================================================================
|
||
jwt:
|
||
# IMPORTANT: Change this to a random string in production!
|
||
# Generate with: openssl rand -hex 32
|
||
secret: "change-this-to-a-secure-random-string"
|
||
# Token expiration time in hours
|
||
expire_hour: 24
|
||
|
||
# =============================================================================
|
||
# Default Settings
|
||
# =============================================================================
|
||
default:
|
||
# Initial admin account (created on first run)
|
||
admin_email: "admin@example.com"
|
||
admin_password: "admin123"
|
||
|
||
# Default settings for new users
|
||
user_concurrency: 5 # Max concurrent requests per user
|
||
user_balance: 0 # Initial balance for new users
|
||
|
||
# API key settings
|
||
api_key_prefix: "sk-" # Prefix for generated API keys
|
||
|
||
# Rate multiplier (affects billing calculation)
|
||
rate_multiplier: 1.0
|
||
|
||
# =============================================================================
|
||
# Rate Limiting
|
||
# =============================================================================
|
||
rate_limit:
|
||
# Cooldown time (in minutes) when upstream returns 529 (overloaded)
|
||
overload_cooldown_minutes: 10
|
||
|
||
# =============================================================================
|
||
# Pricing Data Source (Optional)
|
||
# =============================================================================
|
||
pricing:
|
||
# URL to fetch model pricing data (default: LiteLLM)
|
||
remote_url: "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||
# Hash verification URL (optional)
|
||
hash_url: ""
|
||
# Local data directory for caching
|
||
data_dir: "./data"
|
||
# Fallback pricing file
|
||
fallback_file: "./resources/model-pricing/model_prices_and_context_window.json"
|
||
# Update interval in hours
|
||
update_interval_hours: 24
|
||
# Hash check interval in minutes
|
||
hash_check_interval_minutes: 10
|
||
|
||
# =============================================================================
|
||
# Gateway (Optional)
|
||
# =============================================================================
|
||
gateway:
|
||
# Wait time (in seconds) for upstream response headers (streaming body not affected)
|
||
response_header_timeout: 300
|
||
# Log upstream error response body summary (safe/truncated; does not log request content)
|
||
log_upstream_error_body: false
|
||
# Max bytes to log from upstream error body
|
||
log_upstream_error_body_max_bytes: 2048
|
||
# Auto inject anthropic-beta for API-key accounts when needed (default off)
|
||
inject_beta_for_apikey: false
|
||
# Allow failover on selected 400 errors (default off)
|
||
failover_on_400: false
|
||
|
||
# =============================================================================
|
||
# Gemini OAuth (Required for Gemini accounts)
|
||
# =============================================================================
|
||
# Sub2API supports TWO Gemini OAuth modes:
|
||
#
|
||
# 1. Code Assist OAuth (需要 GCP project_id)
|
||
# - Uses: cloudcode-pa.googleapis.com (Code Assist API)
|
||
#
|
||
# 2. AI Studio OAuth (不需要 project_id)
|
||
# - Uses: generativelanguage.googleapis.com (AI Studio API)
|
||
#
|
||
# Default: Uses Gemini CLI's public OAuth credentials (same as Google's official CLI tool)
|
||
gemini:
|
||
oauth:
|
||
# Gemini CLI public OAuth credentials (works for both Code Assist and AI Studio)
|
||
client_id: "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
|
||
client_secret: "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"
|
||
# Optional scopes (space-separated). Leave empty to auto-select based on oauth_type.
|
||
scopes: ""
|
||
quota:
|
||
# Optional: local quota simulation for Gemini Code Assist (local billing).
|
||
# These values are used for UI progress + precheck scheduling, not official Google quotas.
|
||
tiers:
|
||
LEGACY:
|
||
pro_rpd: 50
|
||
flash_rpd: 1500
|
||
cooldown_minutes: 30
|
||
PRO:
|
||
pro_rpd: 1500
|
||
flash_rpd: 4000
|
||
cooldown_minutes: 5
|
||
ULTRA:
|
||
pro_rpd: 2000
|
||
flash_rpd: 0
|
||
cooldown_minutes: 5
|