xinghuoapi/deploy/config.example.yaml

# Sub2API Configuration File
# Copy this file to /etc/sub2api/config.yaml and modify as needed
# Documentation: https://github.com/Wei-Shaw/sub2api

# =============================================================================
# Server Configuration
# =============================================================================
server:
  # Bind address (0.0.0.0 for all interfaces)
  host: "0.0.0.0"
  # Port to listen on
  port: 8080
  # Mode: "debug" for development, "release" for production
  mode: "release"

# =============================================================================
# Run Mode Configuration
# =============================================================================
# Run mode: "standard" (default) or "simple" (for internal use)
# - standard: Full SaaS features with billing/balance checks
# - simple: Hides SaaS features and skips billing/balance checks
run_mode: "standard"

# =============================================================================
# 网关配置
# =============================================================================
gateway:
  # 等待上游响应头超时时间（秒）
  response_header_timeout: 300
  # 请求体最大字节数（默认 100MB）
  max_body_size: 104857600
  # 连接池隔离策略：
  # - proxy: 按代理隔离，同一代理共享连接池（适合代理少、账户多）
  # - account: 按账户隔离，同一账户共享连接池（适合账户少、需严格隔离）
  # - account_proxy: 按账户+代理组合隔离（默认，最细粒度）
  connection_pool_isolation: "account_proxy"
  # HTTP 上游连接池配置（HTTP/2 + 多代理场景默认）
  max_idle_conns: 240
  max_idle_conns_per_host: 120
  max_conns_per_host: 240
  idle_conn_timeout_seconds: 300
  # 上游连接池客户端缓存配置
  # max_upstream_clients: 最大缓存客户端数量，超出后淘汰最久未使用的
  # client_idle_ttl_seconds: 客户端空闲回收阈值（秒），超时且无活跃请求时回收
  max_upstream_clients: 5000
  client_idle_ttl_seconds: 900
  # 并发槽位过期时间（分钟）
  concurrency_slot_ttl_minutes: 15

# =============================================================================
# Database Configuration (PostgreSQL)
# =============================================================================
database:
  host: "localhost"
  port: 5432
  user: "postgres"
  password: "your_secure_password_here"
  dbname: "sub2api"
  # SSL mode: disable, require, verify-ca, verify-full
  sslmode: "disable"

# =============================================================================
# Redis Configuration
# =============================================================================
redis:
  host: "localhost"
  port: 6379
  # Leave empty if no password is set
  password: ""
  # Database number (0-15)
  db: 0

# =============================================================================
# JWT Configuration
# =============================================================================
jwt:
  # IMPORTANT: Change this to a random string in production!
  # Generate with: openssl rand -hex 32
  secret: "change-this-to-a-secure-random-string"
  # Token expiration time in hours
  expire_hour: 24

# =============================================================================
# Default Settings
# =============================================================================
default:
  # Initial admin account (created on first run)
  admin_email: "admin@example.com"
  admin_password: "admin123"

  # Default settings for new users
  user_concurrency: 5        # Max concurrent requests per user
  user_balance: 0            # Initial balance for new users

  # API key settings
  api_key_prefix: "sk-"      # Prefix for generated API keys

  # Rate multiplier (affects billing calculation)
  rate_multiplier: 1.0

# =============================================================================
# Rate Limiting
# =============================================================================
rate_limit:
  # Cooldown time (in minutes) when upstream returns 529 (overloaded)
  overload_cooldown_minutes: 10

# =============================================================================
# Pricing Data Source (Optional)
# =============================================================================
pricing:
  # URL to fetch model pricing data (default: LiteLLM)
  remote_url: "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
  # Hash verification URL (optional)
  hash_url: ""
  # Local data directory for caching
  data_dir: "./data"
  # Fallback pricing file
  fallback_file: "./resources/model-pricing/model_prices_and_context_window.json"
  # Update interval in hours
  update_interval_hours: 24
  # Hash check interval in minutes
  hash_check_interval_minutes: 10

# =============================================================================
# Gateway (Optional)
# =============================================================================
gateway:
  # Wait time (in seconds) for upstream response headers (streaming body not affected)
  response_header_timeout: 300
  # Log upstream error response body summary (safe/truncated; does not log request content)
  log_upstream_error_body: false
  # Max bytes to log from upstream error body
  log_upstream_error_body_max_bytes: 2048
  # Auto inject anthropic-beta for API-key accounts when needed (default off)
  inject_beta_for_apikey: false
  # Allow failover on selected 400 errors (default off)
  failover_on_400: false

# =============================================================================
# Gemini OAuth (Required for Gemini accounts)
# =============================================================================
# Sub2API supports TWO Gemini OAuth modes:
#
# 1. Code Assist OAuth (需要 GCP project_id)
#    - Uses: cloudcode-pa.googleapis.com (Code Assist API)
#
# 2. AI Studio OAuth (不需要 project_id)
#    - Uses: generativelanguage.googleapis.com (AI Studio API)
#
# Default: Uses Gemini CLI's public OAuth credentials (same as Google's official CLI tool)
gemini:
  oauth:
    # Gemini CLI public OAuth credentials (works for both Code Assist and AI Studio)
    client_id: "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
    client_secret: "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"
    # Optional scopes (space-separated). Leave empty to auto-select based on oauth_type.
    scopes: ""
  quota:
    # Optional: local quota simulation for Gemini Code Assist (local billing).
    # These values are used for UI progress + precheck scheduling, not official Google quotas.
    tiers:
      LEGACY:
        pro_rpd: 50
        flash_rpd: 1500
        cooldown_minutes: 30
      PRO:
        pro_rpd: 1500
        flash_rpd: 4000
        cooldown_minutes: 5
      ULTRA:
        pro_rpd: 2000
        flash_rpd: 0
        cooldown_minutes: 5