Merge branch 'Wei-Shaw:main' into main

2026-02-04 18:56:47 +08:00
parent 31fe017888 8f39754812
commit dd96ada3c6
90 changed files with 5674 additions and 687 deletions
--- a/backend/internal/domain/constants.go
+++ b/backend/internal/domain/constants.go
@@ -29,6 +29,7 @@ const (
 	AccountTypeOAuth      = "oauth"       // OAuth类型账号（full scope: profile + inference）
 	AccountTypeSetupToken = "setup-token" // Setup Token类型账号（inference only scope）
 	AccountTypeAPIKey     = "apikey"      // API Key类型账号
+	AccountTypeUpstream   = "upstream"    // 上游透传类型账号（通过 Base URL + API Key 连接上游）
 )

 // Redeem type constants
--- a/backend/internal/handler/admin/account_handler.go
+++ b/backend/internal/handler/admin/account_handler.go
@@ -84,7 +84,7 @@ type CreateAccountRequest struct {
 	Name                    string         `json:"name" binding:"required"`
 	Notes                   *string        `json:"notes"`
 	Platform                string         `json:"platform" binding:"required"`
-	Type                    string         `json:"type" binding:"required,oneof=oauth setup-token apikey"`
+	Type                    string         `json:"type" binding:"required,oneof=oauth setup-token apikey upstream"`
 	Credentials             map[string]any `json:"credentials" binding:"required"`
 	Extra                   map[string]any `json:"extra"`
 	ProxyID                 *int64         `json:"proxy_id"`
@@ -102,7 +102,7 @@ type CreateAccountRequest struct {
 type UpdateAccountRequest struct {
 	Name                    string         `json:"name"`
 	Notes                   *string        `json:"notes"`
-	Type                    string         `json:"type" binding:"omitempty,oneof=oauth setup-token apikey"`
+	Type                    string         `json:"type" binding:"omitempty,oneof=oauth setup-token apikey upstream"`
 	Credentials             map[string]any `json:"credentials"`
 	Extra                   map[string]any `json:"extra"`
 	ProxyID                 *int64         `json:"proxy_id"`
--- a/backend/internal/handler/admin/group_handler.go
+++ b/backend/internal/handler/admin/group_handler.go
@@ -35,14 +35,18 @@ type CreateGroupRequest struct {
 	WeeklyLimitUSD   *float64 `json:"weekly_limit_usd"`
 	MonthlyLimitUSD  *float64 `json:"monthly_limit_usd"`
 	// 图片生成计费配置（antigravity 和 gemini 平台使用，负数表示清除配置）
-	ImagePrice1K    *float64 `json:"image_price_1k"`
-	ImagePrice2K    *float64 `json:"image_price_2k"`
-	ImagePrice4K    *float64 `json:"image_price_4k"`
-	ClaudeCodeOnly  bool     `json:"claude_code_only"`
-	FallbackGroupID *int64   `json:"fallback_group_id"`
+	ImagePrice1K                    *float64 `json:"image_price_1k"`
+	ImagePrice2K                    *float64 `json:"image_price_2k"`
+	ImagePrice4K                    *float64 `json:"image_price_4k"`
+	ClaudeCodeOnly                  bool     `json:"claude_code_only"`
+	FallbackGroupID                 *int64   `json:"fallback_group_id"`
+	FallbackGroupIDOnInvalidRequest *int64   `json:"fallback_group_id_on_invalid_request"`
 	// 模型路由配置（仅 anthropic 平台使用）
 	ModelRouting        map[string][]int64 `json:"model_routing"`
 	ModelRoutingEnabled bool               `json:"model_routing_enabled"`
+	MCPXMLInject        *bool              `json:"mcp_xml_inject"`
+	// 支持的模型系列（仅 antigravity 平台使用）
+	SupportedModelScopes []string `json:"supported_model_scopes"`
 	// 从指定分组复制账号（创建后自动绑定）
 	CopyAccountsFromGroupIDs []int64 `json:"copy_accounts_from_group_ids"`
 }
@@ -60,14 +64,18 @@ type UpdateGroupRequest struct {
 	WeeklyLimitUSD   *float64 `json:"weekly_limit_usd"`
 	MonthlyLimitUSD  *float64 `json:"monthly_limit_usd"`
 	// 图片生成计费配置（antigravity 和 gemini 平台使用，负数表示清除配置）
-	ImagePrice1K    *float64 `json:"image_price_1k"`
-	ImagePrice2K    *float64 `json:"image_price_2k"`
-	ImagePrice4K    *float64 `json:"image_price_4k"`
-	ClaudeCodeOnly  *bool    `json:"claude_code_only"`
-	FallbackGroupID *int64   `json:"fallback_group_id"`
+	ImagePrice1K                    *float64 `json:"image_price_1k"`
+	ImagePrice2K                    *float64 `json:"image_price_2k"`
+	ImagePrice4K                    *float64 `json:"image_price_4k"`
+	ClaudeCodeOnly                  *bool    `json:"claude_code_only"`
+	FallbackGroupID                 *int64   `json:"fallback_group_id"`
+	FallbackGroupIDOnInvalidRequest *int64   `json:"fallback_group_id_on_invalid_request"`
 	// 模型路由配置（仅 anthropic 平台使用）
 	ModelRouting        map[string][]int64 `json:"model_routing"`
 	ModelRoutingEnabled *bool              `json:"model_routing_enabled"`
+	MCPXMLInject        *bool              `json:"mcp_xml_inject"`
+	// 支持的模型系列（仅 antigravity 平台使用）
+	SupportedModelScopes *[]string `json:"supported_model_scopes"`
 	// 从指定分组复制账号（同步操作：先清空当前分组的账号绑定，再绑定源分组的账号）
 	CopyAccountsFromGroupIDs []int64 `json:"copy_accounts_from_group_ids"`
 }
@@ -159,23 +167,26 @@ func (h *GroupHandler) Create(c *gin.Context) {
 	}

 	group, err := h.adminService.CreateGroup(c.Request.Context(), &service.CreateGroupInput{
-		Name:                     req.Name,
-		Description:              req.Description,
-		Platform:                 req.Platform,
-		RateMultiplier:           req.RateMultiplier,
-		IsExclusive:              req.IsExclusive,
-		SubscriptionType:         req.SubscriptionType,
-		DailyLimitUSD:            req.DailyLimitUSD,
-		WeeklyLimitUSD:           req.WeeklyLimitUSD,
-		MonthlyLimitUSD:          req.MonthlyLimitUSD,
-		ImagePrice1K:             req.ImagePrice1K,
-		ImagePrice2K:             req.ImagePrice2K,
-		ImagePrice4K:             req.ImagePrice4K,
-		ClaudeCodeOnly:           req.ClaudeCodeOnly,
-		FallbackGroupID:          req.FallbackGroupID,
-		ModelRouting:             req.ModelRouting,
-		ModelRoutingEnabled:      req.ModelRoutingEnabled,
-		CopyAccountsFromGroupIDs: req.CopyAccountsFromGroupIDs,
+		Name:                            req.Name,
+		Description:                     req.Description,
+		Platform:                        req.Platform,
+		RateMultiplier:                  req.RateMultiplier,
+		IsExclusive:                     req.IsExclusive,
+		SubscriptionType:                req.SubscriptionType,
+		DailyLimitUSD:                   req.DailyLimitUSD,
+		WeeklyLimitUSD:                  req.WeeklyLimitUSD,
+		MonthlyLimitUSD:                 req.MonthlyLimitUSD,
+		ImagePrice1K:                    req.ImagePrice1K,
+		ImagePrice2K:                    req.ImagePrice2K,
+		ImagePrice4K:                    req.ImagePrice4K,
+		ClaudeCodeOnly:                  req.ClaudeCodeOnly,
+		FallbackGroupID:                 req.FallbackGroupID,
+		FallbackGroupIDOnInvalidRequest: req.FallbackGroupIDOnInvalidRequest,
+		ModelRouting:                    req.ModelRouting,
+		ModelRoutingEnabled:             req.ModelRoutingEnabled,
+		MCPXMLInject:                    req.MCPXMLInject,
+		SupportedModelScopes:            req.SupportedModelScopes,
+		CopyAccountsFromGroupIDs:        req.CopyAccountsFromGroupIDs,
 	})
 	if err != nil {
 		response.ErrorFrom(c, err)
@@ -201,24 +212,27 @@ func (h *GroupHandler) Update(c *gin.Context) {
 	}

 	group, err := h.adminService.UpdateGroup(c.Request.Context(), groupID, &service.UpdateGroupInput{
-		Name:                     req.Name,
-		Description:              req.Description,
-		Platform:                 req.Platform,
-		RateMultiplier:           req.RateMultiplier,
-		IsExclusive:              req.IsExclusive,
-		Status:                   req.Status,
-		SubscriptionType:         req.SubscriptionType,
-		DailyLimitUSD:            req.DailyLimitUSD,
-		WeeklyLimitUSD:           req.WeeklyLimitUSD,
-		MonthlyLimitUSD:          req.MonthlyLimitUSD,
-		ImagePrice1K:             req.ImagePrice1K,
-		ImagePrice2K:             req.ImagePrice2K,
-		ImagePrice4K:             req.ImagePrice4K,
-		ClaudeCodeOnly:           req.ClaudeCodeOnly,
-		FallbackGroupID:          req.FallbackGroupID,
-		ModelRouting:             req.ModelRouting,
-		ModelRoutingEnabled:      req.ModelRoutingEnabled,
-		CopyAccountsFromGroupIDs: req.CopyAccountsFromGroupIDs,
+		Name:                            req.Name,
+		Description:                     req.Description,
+		Platform:                        req.Platform,
+		RateMultiplier:                  req.RateMultiplier,
+		IsExclusive:                     req.IsExclusive,
+		Status:                          req.Status,
+		SubscriptionType:                req.SubscriptionType,
+		DailyLimitUSD:                   req.DailyLimitUSD,
+		WeeklyLimitUSD:                  req.WeeklyLimitUSD,
+		MonthlyLimitUSD:                 req.MonthlyLimitUSD,
+		ImagePrice1K:                    req.ImagePrice1K,
+		ImagePrice2K:                    req.ImagePrice2K,
+		ImagePrice4K:                    req.ImagePrice4K,
+		ClaudeCodeOnly:                  req.ClaudeCodeOnly,
+		FallbackGroupID:                 req.FallbackGroupID,
+		FallbackGroupIDOnInvalidRequest: req.FallbackGroupIDOnInvalidRequest,
+		ModelRouting:                    req.ModelRouting,
+		ModelRoutingEnabled:             req.ModelRoutingEnabled,
+		MCPXMLInject:                    req.MCPXMLInject,
+		SupportedModelScopes:            req.SupportedModelScopes,
+		CopyAccountsFromGroupIDs:        req.CopyAccountsFromGroupIDs,
 	})
 	if err != nil {
 		response.ErrorFrom(c, err)
--- a/backend/internal/handler/api_key_handler.go
+++ b/backend/internal/handler/api_key_handler.go
@@ -3,6 +3,7 @@ package handler

 import (
 	"strconv"
+	"time"

 	"github.com/Wei-Shaw/sub2api/internal/handler/dto"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
@@ -27,11 +28,13 @@ func NewAPIKeyHandler(apiKeyService *service.APIKeyService) *APIKeyHandler {

 // CreateAPIKeyRequest represents the create API key request payload
 type CreateAPIKeyRequest struct {
-	Name        string   `json:"name" binding:"required"`
-	GroupID     *int64   `json:"group_id"`     // nullable
-	CustomKey   *string  `json:"custom_key"`   // 可选的自定义key
-	IPWhitelist []string `json:"ip_whitelist"` // IP 白名单
-	IPBlacklist []string `json:"ip_blacklist"` // IP 黑名单
+	Name          string   `json:"name" binding:"required"`
+	GroupID       *int64   `json:"group_id"`        // nullable
+	CustomKey     *string  `json:"custom_key"`      // 可选的自定义key
+	IPWhitelist   []string `json:"ip_whitelist"`    // IP 白名单
+	IPBlacklist   []string `json:"ip_blacklist"`    // IP 黑名单
+	Quota         *float64 `json:"quota"`           // 配额限制 (USD)
+	ExpiresInDays *int     `json:"expires_in_days"` // 过期天数
 }

 // UpdateAPIKeyRequest represents the update API key request payload
@@ -41,6 +44,9 @@ type UpdateAPIKeyRequest struct {
 	Status      string   `json:"status" binding:"omitempty,oneof=active inactive"`
 	IPWhitelist []string `json:"ip_whitelist"` // IP 白名单
 	IPBlacklist []string `json:"ip_blacklist"` // IP 黑名单
+	Quota       *float64 `json:"quota"`        // 配额限制 (USD), 0=无限制
+	ExpiresAt   *string  `json:"expires_at"`   // 过期时间 (ISO 8601)
+	ResetQuota  *bool    `json:"reset_quota"`  // 重置已用配额
 }

 // List handles listing user's API keys with pagination
@@ -114,11 +120,15 @@ func (h *APIKeyHandler) Create(c *gin.Context) {
 	}

 	svcReq := service.CreateAPIKeyRequest{
-		Name:        req.Name,
-		GroupID:     req.GroupID,
-		CustomKey:   req.CustomKey,
-		IPWhitelist: req.IPWhitelist,
-		IPBlacklist: req.IPBlacklist,
+		Name:          req.Name,
+		GroupID:       req.GroupID,
+		CustomKey:     req.CustomKey,
+		IPWhitelist:   req.IPWhitelist,
+		IPBlacklist:   req.IPBlacklist,
+		ExpiresInDays: req.ExpiresInDays,
+	}
+	if req.Quota != nil {
+		svcReq.Quota = *req.Quota
 	}
 	key, err := h.apiKeyService.Create(c.Request.Context(), subject.UserID, svcReq)
 	if err != nil {
@@ -153,6 +163,8 @@ func (h *APIKeyHandler) Update(c *gin.Context) {
 	svcReq := service.UpdateAPIKeyRequest{
 		IPWhitelist: req.IPWhitelist,
 		IPBlacklist: req.IPBlacklist,
+		Quota:       req.Quota,
+		ResetQuota:  req.ResetQuota,
 	}
 	if req.Name != "" {
 		svcReq.Name = &req.Name
@@ -161,6 +173,21 @@ func (h *APIKeyHandler) Update(c *gin.Context) {
 	if req.Status != "" {
 		svcReq.Status = &req.Status
 	}
+	// Parse expires_at if provided
+	if req.ExpiresAt != nil {
+		if *req.ExpiresAt == "" {
+			// Empty string means clear expiration
+			svcReq.ExpiresAt = nil
+			svcReq.ClearExpiration = true
+		} else {
+			t, err := time.Parse(time.RFC3339, *req.ExpiresAt)
+			if err != nil {
+				response.BadRequest(c, "Invalid expires_at format: "+err.Error())
+				return
+			}
+			svcReq.ExpiresAt = &t
+		}
+	}

 	key, err := h.apiKeyService.Update(c.Request.Context(), keyID, subject.UserID, svcReq)
 	if err != nil {
--- a/backend/internal/handler/dto/mappers.go
+++ b/backend/internal/handler/dto/mappers.go
@@ -76,6 +76,9 @@ func APIKeyFromService(k *service.APIKey) *APIKey {
 		Status:      k.Status,
 		IPWhitelist: k.IPWhitelist,
 		IPBlacklist: k.IPBlacklist,
+		Quota:       k.Quota,
+		QuotaUsed:   k.QuotaUsed,
+		ExpiresAt:   k.ExpiresAt,
 		CreatedAt:   k.CreatedAt,
 		UpdatedAt:   k.UpdatedAt,
 		User:        UserFromServiceShallow(k.User),
@@ -105,10 +108,12 @@ func GroupFromServiceAdmin(g *service.Group) *AdminGroup {
 		return nil
 	}
 	out := &AdminGroup{
-		Group:               groupFromServiceBase(g),
-		ModelRouting:        g.ModelRouting,
-		ModelRoutingEnabled: g.ModelRoutingEnabled,
-		AccountCount:        g.AccountCount,
+		Group:                groupFromServiceBase(g),
+		ModelRouting:         g.ModelRouting,
+		ModelRoutingEnabled:  g.ModelRoutingEnabled,
+		MCPXMLInject:         g.MCPXMLInject,
+		SupportedModelScopes: g.SupportedModelScopes,
+		AccountCount:         g.AccountCount,
 	}
 	if len(g.AccountGroups) > 0 {
 		out.AccountGroups = make([]AccountGroup, 0, len(g.AccountGroups))
@@ -138,8 +143,10 @@ func groupFromServiceBase(g *service.Group) Group {
 		ImagePrice4K:     g.ImagePrice4K,
 		ClaudeCodeOnly:   g.ClaudeCodeOnly,
 		FallbackGroupID:  g.FallbackGroupID,
-		CreatedAt:        g.CreatedAt,
-		UpdatedAt:        g.UpdatedAt,
+		// 无效请求兜底分组
+		FallbackGroupIDOnInvalidRequest: g.FallbackGroupIDOnInvalidRequest,
+		CreatedAt:                       g.CreatedAt,
+		UpdatedAt:                       g.UpdatedAt,
 	}
 }

--- a/backend/internal/handler/dto/types.go
+++ b/backend/internal/handler/dto/types.go
@@ -32,16 +32,19 @@ type AdminUser struct {
 }

 type APIKey struct {
-	ID          int64     `json:"id"`
-	UserID      int64     `json:"user_id"`
-	Key         string    `json:"key"`
-	Name        string    `json:"name"`
-	GroupID     *int64    `json:"group_id"`
-	Status      string    `json:"status"`
-	IPWhitelist []string  `json:"ip_whitelist"`
-	IPBlacklist []string  `json:"ip_blacklist"`
-	CreatedAt   time.Time `json:"created_at"`
-	UpdatedAt   time.Time `json:"updated_at"`
+	ID          int64      `json:"id"`
+	UserID      int64      `json:"user_id"`
+	Key         string     `json:"key"`
+	Name        string     `json:"name"`
+	GroupID     *int64     `json:"group_id"`
+	Status      string     `json:"status"`
+	IPWhitelist []string   `json:"ip_whitelist"`
+	IPBlacklist []string   `json:"ip_blacklist"`
+	Quota       float64    `json:"quota"`      // Quota limit in USD (0 = unlimited)
+	QuotaUsed   float64    `json:"quota_used"` // Used quota amount in USD
+	ExpiresAt   *time.Time `json:"expires_at"` // Expiration time (nil = never expires)
+	CreatedAt   time.Time  `json:"created_at"`
+	UpdatedAt   time.Time  `json:"updated_at"`

 	User  *User  `json:"user,omitempty"`
 	Group *Group `json:"group,omitempty"`
@@ -69,6 +72,8 @@ type Group struct {
 	// Claude Code 客户端限制
 	ClaudeCodeOnly  bool   `json:"claude_code_only"`
 	FallbackGroupID *int64 `json:"fallback_group_id"`
+	// 无效请求兜底分组
+	FallbackGroupIDOnInvalidRequest *int64 `json:"fallback_group_id_on_invalid_request"`

 	CreatedAt time.Time `json:"created_at"`
 	UpdatedAt time.Time `json:"updated_at"`
@@ -83,8 +88,13 @@ type AdminGroup struct {
 	ModelRouting        map[string][]int64 `json:"model_routing"`
 	ModelRoutingEnabled bool               `json:"model_routing_enabled"`

-	AccountGroups []AccountGroup `json:"account_groups,omitempty"`
-	AccountCount  int64          `json:"account_count,omitempty"`
+	// MCP XML 协议注入（仅 antigravity 平台使用）
+	MCPXMLInject bool `json:"mcp_xml_inject"`
+
+	// 支持的模型系列（仅 antigravity 平台使用）
+	SupportedModelScopes []string       `json:"supported_model_scopes"`
+	AccountGroups        []AccountGroup `json:"account_groups,omitempty"`
+	AccountCount         int64          `json:"account_count,omitempty"`
 }

 type Account struct {
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -14,6 +14,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	pkgerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
@@ -31,6 +32,7 @@ type GatewayHandler struct {
 	userService               *service.UserService
 	billingCacheService       *service.BillingCacheService
 	usageService              *service.UsageService
+	apiKeyService             *service.APIKeyService
 	concurrencyHelper         *ConcurrencyHelper
 	maxAccountSwitches        int
 	maxAccountSwitchesGemini  int
@@ -45,6 +47,7 @@ func NewGatewayHandler(
 	concurrencyService *service.ConcurrencyService,
 	billingCacheService *service.BillingCacheService,
 	usageService *service.UsageService,
+	apiKeyService *service.APIKeyService,
 	cfg *config.Config,
 ) *GatewayHandler {
 	pingInterval := time.Duration(0)
@@ -66,6 +69,7 @@ func NewGatewayHandler(
 		userService:               userService,
 		billingCacheService:       billingCacheService,
 		usageService:              usageService,
+		apiKeyService:             apiKeyService,
 		concurrencyHelper:         NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
 		maxAccountSwitches:        maxAccountSwitches,
 		maxAccountSwitchesGemini:  maxAccountSwitchesGemini,
@@ -281,10 +285,14 @@ func (h *GatewayHandler) Messages(c *gin.Context) {

 			// 转发请求 - 根据账号平台分流
 			var result *service.ForwardResult
+			requestCtx := c.Request.Context()
+			if switchCount > 0 {
+				requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
+			}
 			if account.Platform == service.PlatformAntigravity {
-				result, err = h.antigravityGatewayService.ForwardGemini(c.Request.Context(), c, account, reqModel, "generateContent", reqStream, body)
+				result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, reqModel, "generateContent", reqStream, body)
 			} else {
-				result, err = h.geminiCompatService.Forward(c.Request.Context(), c, account, body)
+				result, err = h.geminiCompatService.Forward(requestCtx, c, account, body)
 			}
 			if accountReleaseFunc != nil {
 				accountReleaseFunc()
@@ -316,13 +324,14 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 				defer cancel()
 				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
-					Result:       result,
-					APIKey:       apiKey,
-					User:         apiKey.User,
-					Account:      usedAccount,
-					Subscription: subscription,
-					UserAgent:    ua,
-					IPAddress:    clientIP,
+					Result:        result,
+					APIKey:        apiKey,
+					User:          apiKey.User,
+					Account:       usedAccount,
+					Subscription:  subscription,
+					UserAgent:     ua,
+					IPAddress:     clientIP,
+					APIKeyService: h.apiKeyService,
 				}); err != nil {
 					log.Printf("Record usage failed: %v", err)
 				}
@@ -331,139 +340,193 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		}
 	}

-	maxAccountSwitches := h.maxAccountSwitches
-	switchCount := 0
-	failedAccountIDs := make(map[int64]struct{})
-	lastFailoverStatus := 0
+	currentAPIKey := apiKey
+	currentSubscription := subscription
+	var fallbackGroupID *int64
+	if apiKey.Group != nil {
+		fallbackGroupID = apiKey.Group.FallbackGroupIDOnInvalidRequest
+	}
+	fallbackUsed := false

 	for {
-		// 选择支持该模型的账号
-		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, parsedReq.MetadataUserID)
-		if err != nil {
-			if len(failedAccountIDs) == 0 {
-				h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
-				return
-			}
-			h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
-			return
-		}
-		account := selection.Account
-		setOpsSelectedAccount(c, account.ID)
+		maxAccountSwitches := h.maxAccountSwitches
+		switchCount := 0
+		failedAccountIDs := make(map[int64]struct{})
+		lastFailoverStatus := 0
+		retryWithFallback := false

-		// 检查请求拦截（预热请求、SUGGESTION MODE等）
-		if account.IsInterceptWarmupEnabled() {
-			interceptType := detectInterceptType(body)
-			if interceptType != InterceptTypeNone {
-				if selection.Acquired && selection.ReleaseFunc != nil {
-					selection.ReleaseFunc()
-				}
-				if reqStream {
-					sendMockInterceptStream(c, reqModel, interceptType)
-				} else {
-					sendMockInterceptResponse(c, reqModel, interceptType)
-				}
-				return
-			}
-		}
-
-		// 3. 获取账号并发槽位
-		accountReleaseFunc := selection.ReleaseFunc
-		if !selection.Acquired {
-			if selection.WaitPlan == nil {
-				h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
-				return
-			}
-			accountWaitCounted := false
-			canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
+		for {
+			// 选择支持该模型的账号
+			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, failedAccountIDs, parsedReq.MetadataUserID)
 			if err != nil {
-				log.Printf("Increment account wait count failed: %v", err)
-			} else if !canWait {
-				log.Printf("Account wait queue full: account=%d", account.ID)
-				h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
-				return
-			}
-			if err == nil && canWait {
-				accountWaitCounted = true
-			}
-			defer func() {
-				if accountWaitCounted {
-					h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
-				}
-			}()
-
-			accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
-				c,
-				account.ID,
-				selection.WaitPlan.MaxConcurrency,
-				selection.WaitPlan.Timeout,
-				reqStream,
-				&streamStarted,
-			)
-			if err != nil {
-				log.Printf("Account concurrency acquire failed: %v", err)
-				h.handleConcurrencyError(c, err, "account", streamStarted)
-				return
-			}
-			if accountWaitCounted {
-				h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
-				accountWaitCounted = false
-			}
-			if err := h.gatewayService.BindStickySession(c.Request.Context(), apiKey.GroupID, sessionKey, account.ID); err != nil {
-				log.Printf("Bind sticky session failed: %v", err)
-			}
-		}
-		// 账号槽位/等待计数需要在超时或断开时安全回收
-		accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
-
-		// 转发请求 - 根据账号平台分流
-		var result *service.ForwardResult
-		if account.Platform == service.PlatformAntigravity {
-			result, err = h.antigravityGatewayService.Forward(c.Request.Context(), c, account, body)
-		} else {
-			result, err = h.gatewayService.Forward(c.Request.Context(), c, account, parsedReq)
-		}
-		if accountReleaseFunc != nil {
-			accountReleaseFunc()
-		}
-		if err != nil {
-			var failoverErr *service.UpstreamFailoverError
-			if errors.As(err, &failoverErr) {
-				failedAccountIDs[account.ID] = struct{}{}
-				lastFailoverStatus = failoverErr.StatusCode
-				if switchCount >= maxAccountSwitches {
-					h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
+				if len(failedAccountIDs) == 0 {
+					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
 					return
 				}
-				switchCount++
-				log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
-				continue
+				h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
+				return
 			}
-			// 错误响应已在Forward中处理，这里只记录日志
-			log.Printf("Account %d: Forward request failed: %v", account.ID, err)
+			account := selection.Account
+			setOpsSelectedAccount(c, account.ID)
+
+			// 检查请求拦截（预热请求、SUGGESTION MODE等）
+			if account.IsInterceptWarmupEnabled() {
+				interceptType := detectInterceptType(body)
+				if interceptType != InterceptTypeNone {
+					if selection.Acquired && selection.ReleaseFunc != nil {
+						selection.ReleaseFunc()
+					}
+					if reqStream {
+						sendMockInterceptStream(c, reqModel, interceptType)
+					} else {
+						sendMockInterceptResponse(c, reqModel, interceptType)
+					}
+					return
+				}
+			}
+
+			// 3. 获取账号并发槽位
+			accountReleaseFunc := selection.ReleaseFunc
+			if !selection.Acquired {
+				if selection.WaitPlan == nil {
+					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
+					return
+				}
+				accountWaitCounted := false
+				canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
+				if err != nil {
+					log.Printf("Increment account wait count failed: %v", err)
+				} else if !canWait {
+					log.Printf("Account wait queue full: account=%d", account.ID)
+					h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
+					return
+				}
+				if err == nil && canWait {
+					accountWaitCounted = true
+				}
+				defer func() {
+					if accountWaitCounted {
+						h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
+					}
+				}()
+
+				accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
+					c,
+					account.ID,
+					selection.WaitPlan.MaxConcurrency,
+					selection.WaitPlan.Timeout,
+					reqStream,
+					&streamStarted,
+				)
+				if err != nil {
+					log.Printf("Account concurrency acquire failed: %v", err)
+					h.handleConcurrencyError(c, err, "account", streamStarted)
+					return
+				}
+				if accountWaitCounted {
+					h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
+					accountWaitCounted = false
+				}
+				if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
+					log.Printf("Bind sticky session failed: %v", err)
+				}
+			}
+			// 账号槽位/等待计数需要在超时或断开时安全回收
+			accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
+
+			// 转发请求 - 根据账号平台分流
+			var result *service.ForwardResult
+			requestCtx := c.Request.Context()
+			if switchCount > 0 {
+				requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
+			}
+			if account.Platform == service.PlatformAntigravity {
+				result, err = h.antigravityGatewayService.Forward(requestCtx, c, account, body)
+			} else {
+				result, err = h.gatewayService.Forward(requestCtx, c, account, parsedReq)
+			}
+			if accountReleaseFunc != nil {
+				accountReleaseFunc()
+			}
+			if err != nil {
+				var promptTooLongErr *service.PromptTooLongError
+				if errors.As(err, &promptTooLongErr) {
+					log.Printf("Prompt too long from antigravity: group=%d fallback_group_id=%v fallback_used=%v", currentAPIKey.GroupID, fallbackGroupID, fallbackUsed)
+					if !fallbackUsed && fallbackGroupID != nil && *fallbackGroupID > 0 {
+						fallbackGroup, err := h.gatewayService.ResolveGroupByID(c.Request.Context(), *fallbackGroupID)
+						if err != nil {
+							log.Printf("Resolve fallback group failed: %v", err)
+							_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
+							return
+						}
+						if fallbackGroup.Platform != service.PlatformAnthropic ||
+							fallbackGroup.SubscriptionType == service.SubscriptionTypeSubscription ||
+							fallbackGroup.FallbackGroupIDOnInvalidRequest != nil {
+							log.Printf("Fallback group invalid: group=%d platform=%s subscription=%s", fallbackGroup.ID, fallbackGroup.Platform, fallbackGroup.SubscriptionType)
+							_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
+							return
+						}
+						fallbackAPIKey := cloneAPIKeyWithGroup(apiKey, fallbackGroup)
+						if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), fallbackAPIKey.User, fallbackAPIKey, fallbackGroup, nil); err != nil {
+							status, code, message := billingErrorDetails(err)
+							h.handleStreamingAwareError(c, status, code, message, streamStarted)
+							return
+						}
+						// 兜底重试按“直接请求兜底分组”处理：清除强制平台，允许按分组平台调度
+						ctx := context.WithValue(c.Request.Context(), ctxkey.ForcePlatform, "")
+						c.Request = c.Request.WithContext(ctx)
+						currentAPIKey = fallbackAPIKey
+						currentSubscription = nil
+						fallbackUsed = true
+						retryWithFallback = true
+						break
+					}
+					_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
+					return
+				}
+				var failoverErr *service.UpstreamFailoverError
+				if errors.As(err, &failoverErr) {
+					failedAccountIDs[account.ID] = struct{}{}
+					lastFailoverStatus = failoverErr.StatusCode
+					if switchCount >= maxAccountSwitches {
+						h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
+						return
+					}
+					switchCount++
+					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
+					continue
+				}
+				// 错误响应已在Forward中处理，这里只记录日志
+				log.Printf("Account %d: Forward request failed: %v", account.ID, err)
+				return
+			}
+
+			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+			userAgent := c.GetHeader("User-Agent")
+			clientIP := ip.GetClientIP(c)
+
+			// 异步记录使用量（subscription已在函数开头获取）
+			go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string) {
+				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+				defer cancel()
+				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
+					Result:        result,
+					APIKey:        currentAPIKey,
+					User:          currentAPIKey.User,
+					Account:       usedAccount,
+					Subscription:  currentSubscription,
+					UserAgent:     ua,
+					IPAddress:     clientIP,
+					APIKeyService: h.apiKeyService,
+				}); err != nil {
+					log.Printf("Record usage failed: %v", err)
+				}
+			}(result, account, userAgent, clientIP)
+			return
+		}
+		if !retryWithFallback {
 			return
 		}
-
-		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
-		userAgent := c.GetHeader("User-Agent")
-		clientIP := ip.GetClientIP(c)
-
-		// 异步记录使用量（subscription已在函数开头获取）
-		go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string) {
-			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-			defer cancel()
-			if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
-				Result:       result,
-				APIKey:       apiKey,
-				User:         apiKey.User,
-				Account:      usedAccount,
-				Subscription: subscription,
-				UserAgent:    ua,
-				IPAddress:    clientIP,
-			}); err != nil {
-				log.Printf("Record usage failed: %v", err)
-			}
-		}(result, account, userAgent, clientIP)
-		return
 	}
 }

@@ -527,6 +590,17 @@ func (h *GatewayHandler) AntigravityModels(c *gin.Context) {
 	})
 }

+func cloneAPIKeyWithGroup(apiKey *service.APIKey, group *service.Group) *service.APIKey {
+	if apiKey == nil || group == nil {
+		return apiKey
+	}
+	cloned := *apiKey
+	groupID := group.ID
+	cloned.GroupID = &groupID
+	cloned.Group = group
+	return &cloned
+}
+
 // Usage handles getting account balance and usage statistics for CC Switch integration
 // GET /v1/usage
 func (h *GatewayHandler) Usage(c *gin.Context) {
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -14,6 +14,7 @@ import (
 	"time"

 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/gemini"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/googleapi"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
@@ -335,10 +336,14 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {

 		// 5) forward (根据平台分流)
 		var result *service.ForwardResult
+		requestCtx := c.Request.Context()
+		if switchCount > 0 {
+			requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
+		}
 		if account.Platform == service.PlatformAntigravity {
-			result, err = h.antigravityGatewayService.ForwardGemini(c.Request.Context(), c, account, modelName, action, stream, body)
+			result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, modelName, action, stream, body)
 		} else {
-			result, err = h.geminiCompatService.ForwardNative(c.Request.Context(), c, account, modelName, action, stream, body)
+			result, err = h.geminiCompatService.ForwardNative(requestCtx, c, account, modelName, action, stream, body)
 		}
 		if accountReleaseFunc != nil {
 			accountReleaseFunc()
@@ -381,6 +386,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 				IPAddress:             ip,
 				LongContextThreshold:  200000, // Gemini 200K 阈值
 				LongContextMultiplier: 2.0,    // 超出部分双倍计费
+				APIKeyService:         h.apiKeyService,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -24,6 +24,7 @@ import (
 type OpenAIGatewayHandler struct {
 	gatewayService      *service.OpenAIGatewayService
 	billingCacheService *service.BillingCacheService
+	apiKeyService       *service.APIKeyService
 	concurrencyHelper   *ConcurrencyHelper
 	maxAccountSwitches  int
 }
@@ -33,6 +34,7 @@ func NewOpenAIGatewayHandler(
 	gatewayService *service.OpenAIGatewayService,
 	concurrencyService *service.ConcurrencyService,
 	billingCacheService *service.BillingCacheService,
+	apiKeyService *service.APIKeyService,
 	cfg *config.Config,
 ) *OpenAIGatewayHandler {
 	pingInterval := time.Duration(0)
@@ -46,6 +48,7 @@ func NewOpenAIGatewayHandler(
 	return &OpenAIGatewayHandler{
 		gatewayService:      gatewayService,
 		billingCacheService: billingCacheService,
+		apiKeyService:       apiKeyService,
 		concurrencyHelper:   NewConcurrencyHelper(concurrencyService, SSEPingFormatComment, pingInterval),
 		maxAccountSwitches:  maxAccountSwitches,
 	}
@@ -299,13 +302,14 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()
 			if err := h.gatewayService.RecordUsage(ctx, &service.OpenAIRecordUsageInput{
-				Result:       result,
-				APIKey:       apiKey,
-				User:         apiKey.User,
-				Account:      usedAccount,
-				Subscription: subscription,
-				UserAgent:    ua,
-				IPAddress:    ip,
+				Result:        result,
+				APIKey:        apiKey,
+				User:          apiKey.User,
+				Account:       usedAccount,
+				Subscription:  subscription,
+				UserAgent:     ua,
+				IPAddress:     ip,
+				APIKeyService: h.apiKeyService,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
--- a/backend/internal/pkg/antigravity/oauth.go
+++ b/backend/internal/pkg/antigravity/oauth.go
@@ -40,17 +40,48 @@ const (

 	// URL 可用性 TTL（不可用 URL 的恢复时间）
 	URLAvailabilityTTL = 5 * time.Minute
+
+	// Antigravity API 端点
+	antigravityProdBaseURL  = "https://cloudcode-pa.googleapis.com"
+	antigravityDailyBaseURL = "https://daily-cloudcode-pa.sandbox.googleapis.com"
 )

 // BaseURLs 定义 Antigravity API 端点（与 Antigravity-Manager 保持一致）
 var BaseURLs = []string{
-	"https://cloudcode-pa.googleapis.com",               // prod (优先)
-	"https://daily-cloudcode-pa.sandbox.googleapis.com", // daily sandbox (备用)
+	antigravityProdBaseURL,  // prod (优先)
+	antigravityDailyBaseURL, // daily sandbox (备用)
 }

 // BaseURL 默认 URL（保持向后兼容）
 var BaseURL = BaseURLs[0]

+// ForwardBaseURLs 返回 API 转发用的 URL 顺序（daily 优先）
+func ForwardBaseURLs() []string {
+	if len(BaseURLs) == 0 {
+		return nil
+	}
+	urls := append([]string(nil), BaseURLs...)
+	dailyIndex := -1
+	for i, url := range urls {
+		if url == antigravityDailyBaseURL {
+			dailyIndex = i
+			break
+		}
+	}
+	if dailyIndex <= 0 {
+		return urls
+	}
+	reordered := make([]string, 0, len(urls))
+	reordered = append(reordered, urls[dailyIndex])
+	for i, url := range urls {
+		if i == dailyIndex {
+			continue
+		}
+		reordered = append(reordered, url)
+	}
+	return reordered
+}
+
 // URLAvailability 管理 URL 可用性状态（带 TTL 自动恢复和动态优先级）
 type URLAvailability struct {
 	mu          sync.RWMutex
@@ -100,22 +131,37 @@ func (u *URLAvailability) IsAvailable(url string) bool {
 // GetAvailableURLs 返回可用的 URL 列表
 // 最近成功的 URL 优先，其他按默认顺序
 func (u *URLAvailability) GetAvailableURLs() []string {
+	return u.GetAvailableURLsWithBase(BaseURLs)
+}
+
+// GetAvailableURLsWithBase 返回可用的 URL 列表（使用自定义顺序）
+// 最近成功的 URL 优先，其他按传入顺序
+func (u *URLAvailability) GetAvailableURLsWithBase(baseURLs []string) []string {
 	u.mu.RLock()
 	defer u.mu.RUnlock()

 	now := time.Now()
-	result := make([]string, 0, len(BaseURLs))
+	result := make([]string, 0, len(baseURLs))

 	// 如果有最近成功的 URL 且可用，放在最前面
 	if u.lastSuccess != "" {
-		expiry, exists := u.unavailable[u.lastSuccess]
-		if !exists || now.After(expiry) {
-			result = append(result, u.lastSuccess)
+		found := false
+		for _, url := range baseURLs {
+			if url == u.lastSuccess {
+				found = true
+				break
+			}
+		}
+		if found {
+			expiry, exists := u.unavailable[u.lastSuccess]
+			if !exists || now.After(expiry) {
+				result = append(result, u.lastSuccess)
+			}
 		}
 	}

-	// 添加其他可用的 URL（按默认顺序）
-	for _, url := range BaseURLs {
+	// 添加其他可用的 URL（按传入顺序）
+	for _, url := range baseURLs {
 		// 跳过已添加的 lastSuccess
 		if url == u.lastSuccess {
 			continue
--- a/backend/internal/pkg/antigravity/request_transformer.go
+++ b/backend/internal/pkg/antigravity/request_transformer.go
@@ -44,11 +44,13 @@ type TransformOptions struct {
 	// IdentityPatch 可选：自定义注入到 systemInstruction 开头的身份防护提示词；
 	// 为空时使用默认模板（包含 [IDENTITY_PATCH] 及 SYSTEM_PROMPT_BEGIN 标记）。
 	IdentityPatch string
+	EnableMCPXML  bool
 }

 func DefaultTransformOptions() TransformOptions {
 	return TransformOptions{
 		EnableIdentityPatch: true,
+		EnableMCPXML:        true,
 	}
 }

@@ -257,8 +259,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
 	// 添加用户的 system prompt
 	parts = append(parts, userSystemParts...)

-	// 检测是否有 MCP 工具，如有则注入 XML 调用协议
-	if hasMCPTools(tools) {
+	// 检测是否有 MCP 工具，如有且启用了 MCP XML 注入则注入 XML 调用协议
+	if opts.EnableMCPXML && hasMCPTools(tools) {
 		parts = append(parts, GeminiPart{Text: mcpXMLProtocol})
 	}

@@ -312,7 +314,7 @@ func buildContents(messages []ClaudeMessage, toolIDToName map[string]string, isT
 				parts = append([]GeminiPart{{
 					Text:             "Thinking...",
 					Thought:          true,
-					ThoughtSignature: dummyThoughtSignature,
+					ThoughtSignature: DummyThoughtSignature,
 				}}, parts...)
 			}
 		}
@@ -330,9 +332,10 @@ func buildContents(messages []ClaudeMessage, toolIDToName map[string]string, isT
 	return contents, strippedThinking, nil
 }

-// dummyThoughtSignature 用于跳过 Gemini 3 thought_signature 验证
+// DummyThoughtSignature 用于跳过 Gemini 3 thought_signature 验证
 // 参考: https://ai.google.dev/gemini-api/docs/thought-signatures
-const dummyThoughtSignature = "skip_thought_signature_validator"
+// 导出供跨包使用（如 gemini_native_signature_cleaner 跨账号修复）
+const DummyThoughtSignature = "skip_thought_signature_validator"

 // buildParts 构建消息的 parts
 // allowDummyThought: 只有 Gemini 模型支持 dummy thought signature
@@ -370,7 +373,7 @@ func buildParts(content json.RawMessage, toolIDToName map[string]string, allowDu
 			// signature 处理：
 			// - Claude 模型（allowDummyThought=false）：必须是上游返回的真实 signature（dummy 视为缺失）
 			// - Gemini 模型（allowDummyThought=true）：优先透传真实 signature，缺失时使用 dummy signature
-			if block.Signature != "" && (allowDummyThought || block.Signature != dummyThoughtSignature) {
+			if block.Signature != "" && (allowDummyThought || block.Signature != DummyThoughtSignature) {
 				part.ThoughtSignature = block.Signature
 			} else if !allowDummyThought {
 				// Claude 模型需要有效 signature；在缺失时降级为普通文本，并在上层禁用 thinking mode。
@@ -381,7 +384,7 @@ func buildParts(content json.RawMessage, toolIDToName map[string]string, allowDu
 				continue
 			} else {
 				// Gemini 模型使用 dummy signature
-				part.ThoughtSignature = dummyThoughtSignature
+				part.ThoughtSignature = DummyThoughtSignature
 			}
 			parts = append(parts, part)

@@ -411,10 +414,10 @@ func buildParts(content json.RawMessage, toolIDToName map[string]string, allowDu
 			// tool_use 的 signature 处理：
 			// - Claude 模型（allowDummyThought=false）：必须是上游返回的真实 signature（dummy 视为缺失）
 			// - Gemini 模型（allowDummyThought=true）：优先透传真实 signature，缺失时使用 dummy signature
-			if block.Signature != "" && (allowDummyThought || block.Signature != dummyThoughtSignature) {
+			if block.Signature != "" && (allowDummyThought || block.Signature != DummyThoughtSignature) {
 				part.ThoughtSignature = block.Signature
 			} else if allowDummyThought {
-				part.ThoughtSignature = dummyThoughtSignature
+				part.ThoughtSignature = DummyThoughtSignature
 			}
 			parts = append(parts, part)

@@ -492,9 +495,23 @@ func parseToolResultContent(content json.RawMessage, isError bool) string {
 }

 // buildGenerationConfig 构建 generationConfig
+const (
+	defaultMaxOutputTokens    = 64000
+	maxOutputTokensUpperBound = 65000
+	maxOutputTokensClaude     = 64000
+)
+
+func maxOutputTokensLimit(model string) int {
+	if strings.HasPrefix(model, "claude-") {
+		return maxOutputTokensClaude
+	}
+	return maxOutputTokensUpperBound
+}
+
 func buildGenerationConfig(req *ClaudeRequest) *GeminiGenerationConfig {
+	maxLimit := maxOutputTokensLimit(req.Model)
 	config := &GeminiGenerationConfig{
-		MaxOutputTokens: 64000, // 默认最大输出
+		MaxOutputTokens: defaultMaxOutputTokens, // 默认最大输出
 		StopSequences:   DefaultStopSequences,
 	}

@@ -518,6 +535,10 @@ func buildGenerationConfig(req *ClaudeRequest) *GeminiGenerationConfig {
 		}
 	}

+	if config.MaxOutputTokens > maxLimit {
+		config.MaxOutputTokens = maxLimit
+	}
+
 	// 其他参数
 	if req.Temperature != nil {
 		config.Temperature = req.Temperature
--- a/backend/internal/pkg/antigravity/request_transformer_test.go
+++ b/backend/internal/pkg/antigravity/request_transformer_test.go
@@ -86,7 +86,7 @@ func TestBuildParts_ThinkingBlockWithoutSignature(t *testing.T) {
 				if len(parts) != 3 {
 					t.Fatalf("expected 3 parts, got %d", len(parts))
 				}
-				if !parts[1].Thought || parts[1].ThoughtSignature != dummyThoughtSignature {
+				if !parts[1].Thought || parts[1].ThoughtSignature != DummyThoughtSignature {
 					t.Fatalf("expected dummy thought signature, got thought=%v signature=%q",
 						parts[1].Thought, parts[1].ThoughtSignature)
 				}
@@ -126,8 +126,8 @@ func TestBuildParts_ToolUseSignatureHandling(t *testing.T) {
 		if len(parts) != 1 || parts[0].FunctionCall == nil {
 			t.Fatalf("expected 1 functionCall part, got %+v", parts)
 		}
-		if parts[0].ThoughtSignature != dummyThoughtSignature {
-			t.Fatalf("expected dummy tool signature %q, got %q", dummyThoughtSignature, parts[0].ThoughtSignature)
+		if parts[0].ThoughtSignature != DummyThoughtSignature {
+			t.Fatalf("expected dummy tool signature %q, got %q", DummyThoughtSignature, parts[0].ThoughtSignature)
 		}
 	})

--- a/backend/internal/pkg/ctxkey/ctxkey.go
+++ b/backend/internal/pkg/ctxkey/ctxkey.go
@@ -14,6 +14,9 @@ const (
 	// RetryCount 表示当前请求在网关层的重试次数（用于 Ops 记录与排障）。
 	RetryCount Key = "ctx_retry_count"

+	// AccountSwitchCount 表示请求过程中发生的账号切换次数
+	AccountSwitchCount Key = "ctx_account_switch_count"
+
 	// IsClaudeCodeClient 标识当前请求是否来自 Claude Code 客户端
 	IsClaudeCodeClient Key = "ctx_is_claude_code_client"
 	// Group 认证后的分组信息，由 API Key 认证中间件设置
--- a/backend/internal/repository/api_key_repo.go
+++ b/backend/internal/repository/api_key_repo.go
@@ -33,7 +33,10 @@ func (r *apiKeyRepository) Create(ctx context.Context, key *service.APIKey) erro
 		SetKey(key.Key).
 		SetName(key.Name).
 		SetStatus(key.Status).
-		SetNillableGroupID(key.GroupID)
+		SetNillableGroupID(key.GroupID).
+		SetQuota(key.Quota).
+		SetQuotaUsed(key.QuotaUsed).
+		SetNillableExpiresAt(key.ExpiresAt)

 	if len(key.IPWhitelist) > 0 {
 		builder.SetIPWhitelist(key.IPWhitelist)
@@ -110,6 +113,9 @@ func (r *apiKeyRepository) GetByKeyForAuth(ctx context.Context, key string) (*se
 			apikey.FieldStatus,
 			apikey.FieldIPWhitelist,
 			apikey.FieldIPBlacklist,
+			apikey.FieldQuota,
+			apikey.FieldQuotaUsed,
+			apikey.FieldExpiresAt,
 		).
 		WithUser(func(q *dbent.UserQuery) {
 			q.Select(
@@ -136,8 +142,11 @@ func (r *apiKeyRepository) GetByKeyForAuth(ctx context.Context, key string) (*se
 				group.FieldImagePrice4k,
 				group.FieldClaudeCodeOnly,
 				group.FieldFallbackGroupID,
+				group.FieldFallbackGroupIDOnInvalidRequest,
 				group.FieldModelRoutingEnabled,
 				group.FieldModelRouting,
+				group.FieldMcpXMLInject,
+				group.FieldSupportedModelScopes,
 			)
 		}).
 		Only(ctx)
@@ -161,6 +170,8 @@ func (r *apiKeyRepository) Update(ctx context.Context, key *service.APIKey) erro
 		Where(apikey.IDEQ(key.ID), apikey.DeletedAtIsNil()).
 		SetName(key.Name).
 		SetStatus(key.Status).
+		SetQuota(key.Quota).
+		SetQuotaUsed(key.QuotaUsed).
 		SetUpdatedAt(now)
 	if key.GroupID != nil {
 		builder.SetGroupID(*key.GroupID)
@@ -168,6 +179,13 @@ func (r *apiKeyRepository) Update(ctx context.Context, key *service.APIKey) erro
 		builder.ClearGroupID()
 	}

+	// Expiration time
+	if key.ExpiresAt != nil {
+		builder.SetExpiresAt(*key.ExpiresAt)
+	} else {
+		builder.ClearExpiresAt()
+	}
+
 	// IP 限制字段
 	if len(key.IPWhitelist) > 0 {
 		builder.SetIPWhitelist(key.IPWhitelist)
@@ -357,6 +375,38 @@ func (r *apiKeyRepository) ListKeysByGroupID(ctx context.Context, groupID int64)
 	return keys, nil
 }

+// IncrementQuotaUsed atomically increments the quota_used field and returns the new value
+func (r *apiKeyRepository) IncrementQuotaUsed(ctx context.Context, id int64, amount float64) (float64, error) {
+	// Use raw SQL for atomic increment to avoid race conditions
+	// First get current value
+	m, err := r.activeQuery().
+		Where(apikey.IDEQ(id)).
+		Select(apikey.FieldQuotaUsed).
+		Only(ctx)
+	if err != nil {
+		if dbent.IsNotFound(err) {
+			return 0, service.ErrAPIKeyNotFound
+		}
+		return 0, err
+	}
+
+	newValue := m.QuotaUsed + amount
+
+	// Update with new value
+	affected, err := r.client.APIKey.Update().
+		Where(apikey.IDEQ(id), apikey.DeletedAtIsNil()).
+		SetQuotaUsed(newValue).
+		Save(ctx)
+	if err != nil {
+		return 0, err
+	}
+	if affected == 0 {
+		return 0, service.ErrAPIKeyNotFound
+	}
+
+	return newValue, nil
+}
+
 func apiKeyEntityToService(m *dbent.APIKey) *service.APIKey {
 	if m == nil {
 		return nil
@@ -372,6 +422,9 @@ func apiKeyEntityToService(m *dbent.APIKey) *service.APIKey {
 		CreatedAt:   m.CreatedAt,
 		UpdatedAt:   m.UpdatedAt,
 		GroupID:     m.GroupID,
+		Quota:       m.Quota,
+		QuotaUsed:   m.QuotaUsed,
+		ExpiresAt:   m.ExpiresAt,
 	}
 	if m.Edges.User != nil {
 		out.User = userEntityToService(m.Edges.User)
@@ -409,28 +462,31 @@ func groupEntityToService(g *dbent.Group) *service.Group {
 		return nil
 	}
 	return &service.Group{
-		ID:                  g.ID,
-		Name:                g.Name,
-		Description:         derefString(g.Description),
-		Platform:            g.Platform,
-		RateMultiplier:      g.RateMultiplier,
-		IsExclusive:         g.IsExclusive,
-		Status:              g.Status,
-		Hydrated:            true,
-		SubscriptionType:    g.SubscriptionType,
-		DailyLimitUSD:       g.DailyLimitUsd,
-		WeeklyLimitUSD:      g.WeeklyLimitUsd,
-		MonthlyLimitUSD:     g.MonthlyLimitUsd,
-		ImagePrice1K:        g.ImagePrice1k,
-		ImagePrice2K:        g.ImagePrice2k,
-		ImagePrice4K:        g.ImagePrice4k,
-		DefaultValidityDays: g.DefaultValidityDays,
-		ClaudeCodeOnly:      g.ClaudeCodeOnly,
-		FallbackGroupID:     g.FallbackGroupID,
-		ModelRouting:        g.ModelRouting,
-		ModelRoutingEnabled: g.ModelRoutingEnabled,
-		CreatedAt:           g.CreatedAt,
-		UpdatedAt:           g.UpdatedAt,
+		ID:                              g.ID,
+		Name:                            g.Name,
+		Description:                     derefString(g.Description),
+		Platform:                        g.Platform,
+		RateMultiplier:                  g.RateMultiplier,
+		IsExclusive:                     g.IsExclusive,
+		Status:                          g.Status,
+		Hydrated:                        true,
+		SubscriptionType:                g.SubscriptionType,
+		DailyLimitUSD:                   g.DailyLimitUsd,
+		WeeklyLimitUSD:                  g.WeeklyLimitUsd,
+		MonthlyLimitUSD:                 g.MonthlyLimitUsd,
+		ImagePrice1K:                    g.ImagePrice1k,
+		ImagePrice2K:                    g.ImagePrice2k,
+		ImagePrice4K:                    g.ImagePrice4k,
+		DefaultValidityDays:             g.DefaultValidityDays,
+		ClaudeCodeOnly:                  g.ClaudeCodeOnly,
+		FallbackGroupID:                 g.FallbackGroupID,
+		FallbackGroupIDOnInvalidRequest: g.FallbackGroupIDOnInvalidRequest,
+		ModelRouting:                    g.ModelRouting,
+		ModelRoutingEnabled:             g.ModelRoutingEnabled,
+		MCPXMLInject:                    g.McpXMLInject,
+		SupportedModelScopes:            g.SupportedModelScopes,
+		CreatedAt:                       g.CreatedAt,
+		UpdatedAt:                       g.UpdatedAt,
 	}
 }

--- a/backend/internal/repository/group_repo.go
+++ b/backend/internal/repository/group_repo.go
@@ -50,13 +50,18 @@ func (r *groupRepository) Create(ctx context.Context, groupIn *service.Group) er
 		SetDefaultValidityDays(groupIn.DefaultValidityDays).
 		SetClaudeCodeOnly(groupIn.ClaudeCodeOnly).
 		SetNillableFallbackGroupID(groupIn.FallbackGroupID).
-		SetModelRoutingEnabled(groupIn.ModelRoutingEnabled)
+		SetNillableFallbackGroupIDOnInvalidRequest(groupIn.FallbackGroupIDOnInvalidRequest).
+		SetModelRoutingEnabled(groupIn.ModelRoutingEnabled).
+		SetMcpXMLInject(groupIn.MCPXMLInject)

 	// 设置模型路由配置
 	if groupIn.ModelRouting != nil {
 		builder = builder.SetModelRouting(groupIn.ModelRouting)
 	}

+	// 设置支持的模型系列（始终设置，空数组表示不限制）
+	builder = builder.SetSupportedModelScopes(groupIn.SupportedModelScopes)
+
 	created, err := builder.Save(ctx)
 	if err == nil {
 		groupIn.ID = created.ID
@@ -87,7 +92,6 @@ func (r *groupRepository) GetByIDLite(ctx context.Context, id int64) (*service.G
 	if err != nil {
 		return nil, translatePersistenceError(err, service.ErrGroupNotFound, nil)
 	}
-
 	return groupEntityToService(m), nil
 }

@@ -108,7 +112,8 @@ func (r *groupRepository) Update(ctx context.Context, groupIn *service.Group) er
 		SetNillableImagePrice4k(groupIn.ImagePrice4K).
 		SetDefaultValidityDays(groupIn.DefaultValidityDays).
 		SetClaudeCodeOnly(groupIn.ClaudeCodeOnly).
-		SetModelRoutingEnabled(groupIn.ModelRoutingEnabled)
+		SetModelRoutingEnabled(groupIn.ModelRoutingEnabled).
+		SetMcpXMLInject(groupIn.MCPXMLInject)

 	// 处理 FallbackGroupID：nil 时清除，否则设置
 	if groupIn.FallbackGroupID != nil {
@@ -116,6 +121,12 @@ func (r *groupRepository) Update(ctx context.Context, groupIn *service.Group) er
 	} else {
 		builder = builder.ClearFallbackGroupID()
 	}
+	// 处理 FallbackGroupIDOnInvalidRequest：nil 时清除，否则设置
+	if groupIn.FallbackGroupIDOnInvalidRequest != nil {
+		builder = builder.SetFallbackGroupIDOnInvalidRequest(*groupIn.FallbackGroupIDOnInvalidRequest)
+	} else {
+		builder = builder.ClearFallbackGroupIDOnInvalidRequest()
+	}

 	// 处理 ModelRouting：nil 时清除，否则设置
 	if groupIn.ModelRouting != nil {
@@ -124,6 +135,9 @@ func (r *groupRepository) Update(ctx context.Context, groupIn *service.Group) er
 		builder = builder.ClearModelRouting()
 	}

+	// 处理 SupportedModelScopes（始终设置，空数组表示不限制）
+	builder = builder.SetSupportedModelScopes(groupIn.SupportedModelScopes)
+
 	updated, err := builder.Save(ctx)
 	if err != nil {
 		return translatePersistenceError(err, service.ErrGroupNotFound, service.ErrGroupExists)
--- a/backend/internal/repository/ops_repo_metrics.go
+++ b/backend/internal/repository/ops_repo_metrics.go
@@ -43,6 +43,7 @@ INSERT INTO ops_system_metrics (
  upstream_529_count,

  token_consumed,
+  account_switch_count,
  qps,
  tps,

@@ -81,14 +82,14 @@ INSERT INTO ops_system_metrics (
  $1,$2,$3,$4,
  $5,$6,$7,$8,
  $9,$10,$11,
-  $12,$13,$14,
-  $15,$16,$17,$18,$19,$20,
-  $21,$22,$23,$24,$25,$26,
-  $27,$28,$29,$30,
-  $31,$32,
-  $33,$34,
-  $35,$36,$37,
-  $38,$39
+  $12,$13,$14,$15,
+  $16,$17,$18,$19,$20,$21,
+  $22,$23,$24,$25,$26,$27,
+  $28,$29,$30,$31,
+  $32,$33,
+  $34,$35,
+  $36,$37,$38,
+  $39,$40
 )`

 	_, err := r.db.ExecContext(
@@ -109,6 +110,7 @@ INSERT INTO ops_system_metrics (
 		input.Upstream529Count,

 		input.TokenConsumed,
+		input.AccountSwitchCount,
 		opsNullFloat64(input.QPS),
 		opsNullFloat64(input.TPS),

@@ -177,7 +179,8 @@ SELECT
  db_conn_waiting,

  goroutine_count,
-  concurrency_queue_depth
+  concurrency_queue_depth,
+  account_switch_count
 FROM ops_system_metrics
 WHERE window_minutes = $1
  AND platform IS NULL
@@ -199,6 +202,7 @@ LIMIT 1`
 	var dbWaiting sql.NullInt64
 	var goroutines sql.NullInt64
 	var queueDepth sql.NullInt64
+	var accountSwitchCount sql.NullInt64

 	if err := r.db.QueryRowContext(ctx, q, windowMinutes).Scan(
 		&out.ID,
@@ -217,6 +221,7 @@ LIMIT 1`
 		&dbWaiting,
 		&goroutines,
 		&queueDepth,
+		&accountSwitchCount,
 	); err != nil {
 		return nil, err
 	}
@@ -273,6 +278,10 @@ LIMIT 1`
 		v := int(queueDepth.Int64)
 		out.ConcurrencyQueueDepth = &v
 	}
+	if accountSwitchCount.Valid {
+		v := accountSwitchCount.Int64
+		out.AccountSwitchCount = &v
+	}

 	return &out, nil
 }
--- a/backend/internal/repository/ops_repo_trends.go
+++ b/backend/internal/repository/ops_repo_trends.go
@@ -56,18 +56,44 @@ error_buckets AS (
    AND COALESCE(status_code, 0) >= 400
  GROUP BY 1
 ),
+switch_buckets AS (
+  SELECT ` + errorBucketExpr + ` AS bucket,
+         COALESCE(SUM(CASE
+           WHEN split_part(ev->>'kind', ':', 1) IN ('failover', 'retry_exhausted_failover', 'failover_on_400') THEN 1
+           ELSE 0
+         END), 0) AS switch_count
+  FROM ops_error_logs
+  CROSS JOIN LATERAL jsonb_array_elements(
+    COALESCE(NULLIF(upstream_errors, 'null'::jsonb), '[]'::jsonb)
+  ) AS ev
+  ` + errorWhere + `
+    AND upstream_errors IS NOT NULL
+  GROUP BY 1
+),
 combined AS (
-  SELECT COALESCE(u.bucket, e.bucket) AS bucket,
-         COALESCE(u.success_count, 0) AS success_count,
-         COALESCE(e.error_count, 0) AS error_count,
-         COALESCE(u.token_consumed, 0) AS token_consumed
-  FROM usage_buckets u
-  FULL OUTER JOIN error_buckets e ON u.bucket = e.bucket
+  SELECT
+    bucket,
+    SUM(success_count) AS success_count,
+    SUM(error_count) AS error_count,
+    SUM(token_consumed) AS token_consumed,
+    SUM(switch_count) AS switch_count
+  FROM (
+    SELECT bucket, success_count, 0 AS error_count, token_consumed, 0 AS switch_count
+    FROM usage_buckets
+    UNION ALL
+    SELECT bucket, 0, error_count, 0, 0
+    FROM error_buckets
+    UNION ALL
+    SELECT bucket, 0, 0, 0, switch_count
+    FROM switch_buckets
+  ) t
+  GROUP BY bucket
 )
 SELECT
  bucket,
  (success_count + error_count) AS request_count,
-  token_consumed
+  token_consumed,
+  switch_count
 FROM combined
 ORDER BY bucket ASC`

@@ -84,13 +110,18 @@ ORDER BY bucket ASC`
 		var bucket time.Time
 		var requests int64
 		var tokens sql.NullInt64
-		if err := rows.Scan(&bucket, &requests, &tokens); err != nil {
+		var switches sql.NullInt64
+		if err := rows.Scan(&bucket, &requests, &tokens, &switches); err != nil {
 			return nil, err
 		}
 		tokenConsumed := int64(0)
 		if tokens.Valid {
 			tokenConsumed = tokens.Int64
 		}
+		switchCount := int64(0)
+		if switches.Valid {
+			switchCount = switches.Int64
+		}

 		denom := float64(bucketSeconds)
 		if denom <= 0 {
@@ -103,6 +134,7 @@ ORDER BY bucket ASC`
 			BucketStart:   bucket.UTC(),
 			RequestCount:  requests,
 			TokenConsumed: tokenConsumed,
+			SwitchCount:   switchCount,
 			QPS:           qps,
 			TPS:           tps,
 		})
@@ -385,6 +417,7 @@ func fillOpsThroughputBuckets(start, end time.Time, bucketSeconds int, points []
 			BucketStart:   cursor,
 			RequestCount:  0,
 			TokenConsumed: 0,
+			SwitchCount:   0,
 			QPS:           0,
 			TPS:           0,
 		})
--- a/backend/internal/server/api_contract_test.go
+++ b/backend/internal/server/api_contract_test.go
@@ -83,6 +83,9 @@ func TestAPIContracts(t *testing.T) {
 					"status": "active",
 					"ip_whitelist": null,
 					"ip_blacklist": null,
+					"quota": 0,
+					"quota_used": 0,
+					"expires_at": null,
 					"created_at": "2025-01-02T03:04:05Z",
 					"updated_at": "2025-01-02T03:04:05Z"
 				}
@@ -119,6 +122,9 @@ func TestAPIContracts(t *testing.T) {
 							"status": "active",
 							"ip_whitelist": null,
 							"ip_blacklist": null,
+							"quota": 0,
+							"quota_used": 0,
+							"expires_at": null,
 							"created_at": "2025-01-02T03:04:05Z",
 							"updated_at": "2025-01-02T03:04:05Z"
 						}
@@ -180,6 +186,7 @@ func TestAPIContracts(t *testing.T) {
 						"image_price_4k": null,
 						"claude_code_only": false,
 						"fallback_group_id": null,
+						"fallback_group_id_on_invalid_request": null,
 						"created_at": "2025-01-02T03:04:05Z",
 						"updated_at": "2025-01-02T03:04:05Z"
 					}
@@ -601,7 +608,7 @@ func newContractDeps(t *testing.T) *contractDeps {
 	settingService := service.NewSettingService(settingRepo, cfg)

 	adminService := service.NewAdminService(userRepo, groupRepo, &accountRepo, proxyRepo, apiKeyRepo, redeemRepo, nil, nil, nil, nil)
-	authHandler := handler.NewAuthHandler(cfg, nil, userService, settingService, nil, nil, nil)
+	authHandler := handler.NewAuthHandler(cfg, nil, userService, settingService, nil, redeemService, nil)
 	apiKeyHandler := handler.NewAPIKeyHandler(apiKeyService)
 	usageHandler := handler.NewUsageHandler(usageService, apiKeyService)
 	adminSettingHandler := adminhandler.NewSettingHandler(settingService, nil, nil, nil)
@@ -1442,6 +1449,10 @@ func (r *stubApiKeyRepo) ListKeysByGroupID(ctx context.Context, groupID int64) (
 	return nil, errors.New("not implemented")
 }

+func (r *stubApiKeyRepo) IncrementQuotaUsed(ctx context.Context, id int64, amount float64) (float64, error) {
+	return 0, errors.New("not implemented")
+}
+
 type stubUsageLogRepo struct {
 	userLogs map[int64][]service.UsageLog
 }
--- a/backend/internal/server/middleware/api_key_auth.go
+++ b/backend/internal/server/middleware/api_key_auth.go
@@ -70,7 +70,27 @@ func apiKeyAuthWithSubscription(apiKeyService *service.APIKeyService, subscripti

 		// 检查API key是否激活
 		if !apiKey.IsActive() {
-			AbortWithError(c, 401, "API_KEY_DISABLED", "API key is disabled")
+			// Provide more specific error message based on status
+			switch apiKey.Status {
+			case service.StatusAPIKeyQuotaExhausted:
+				AbortWithError(c, 429, "API_KEY_QUOTA_EXHAUSTED", "API key 额度已用完")
+			case service.StatusAPIKeyExpired:
+				AbortWithError(c, 403, "API_KEY_EXPIRED", "API key 已过期")
+			default:
+				AbortWithError(c, 401, "API_KEY_DISABLED", "API key is disabled")
+			}
+			return
+		}
+
+		// 检查API Key是否过期（即使状态是active，也要检查时间）
+		if apiKey.IsExpired() {
+			AbortWithError(c, 403, "API_KEY_EXPIRED", "API key 已过期")
+			return
+		}
+
+		// 检查API Key配额是否耗尽
+		if apiKey.IsQuotaExhausted() {
+			AbortWithError(c, 429, "API_KEY_QUOTA_EXHAUSTED", "API key 额度已用完")
 			return
 		}

--- a/backend/internal/server/middleware/api_key_auth_google.go
+++ b/backend/internal/server/middleware/api_key_auth_google.go
@@ -26,7 +26,7 @@ func APIKeyAuthWithSubscriptionGoogle(apiKeyService *service.APIKeyService, subs
 			abortWithGoogleError(c, 400, "Query parameter api_key is deprecated. Use Authorization header or key instead.")
 			return
 		}
-		apiKeyString := extractAPIKeyFromRequest(c)
+		apiKeyString := extractAPIKeyForGoogle(c)
 		if apiKeyString == "" {
 			abortWithGoogleError(c, 401, "API key is required")
 			return
@@ -108,25 +108,38 @@ func APIKeyAuthWithSubscriptionGoogle(apiKeyService *service.APIKeyService, subs
 	}
 }

-func extractAPIKeyFromRequest(c *gin.Context) string {
-	authHeader := c.GetHeader("Authorization")
-	if authHeader != "" {
-		parts := strings.SplitN(authHeader, " ", 2)
-		if len(parts) == 2 && parts[0] == "Bearer" && strings.TrimSpace(parts[1]) != "" {
-			return strings.TrimSpace(parts[1])
+// extractAPIKeyForGoogle extracts API key for Google/Gemini endpoints.
+// Priority: x-goog-api-key > Authorization: Bearer > x-api-key > query key
+// This allows OpenClaw and other clients using Bearer auth to work with Gemini endpoints.
+func extractAPIKeyForGoogle(c *gin.Context) string {
+	// 1) preferred: Gemini native header
+	if k := strings.TrimSpace(c.GetHeader("x-goog-api-key")); k != "" {
+		return k
+	}
+
+	// 2) fallback: Authorization: Bearer <key>
+	auth := strings.TrimSpace(c.GetHeader("Authorization"))
+	if auth != "" {
+		parts := strings.SplitN(auth, " ", 2)
+		if len(parts) == 2 && strings.EqualFold(parts[0], "Bearer") {
+			if k := strings.TrimSpace(parts[1]); k != "" {
+				return k
+			}
 		}
 	}
-	if v := strings.TrimSpace(c.GetHeader("x-api-key")); v != "" {
-		return v
-	}
-	if v := strings.TrimSpace(c.GetHeader("x-goog-api-key")); v != "" {
-		return v
+
+	// 3) x-api-key header (backward compatibility)
+	if k := strings.TrimSpace(c.GetHeader("x-api-key")); k != "" {
+		return k
 	}
+
+	// 4) query parameter key (for specific paths)
 	if allowGoogleQueryKey(c.Request.URL.Path) {
 		if v := strings.TrimSpace(c.Query("key")); v != "" {
 			return v
 		}
 	}
+
 	return ""
 }

--- a/backend/internal/server/middleware/api_key_auth_google_test.go
+++ b/backend/internal/server/middleware/api_key_auth_google_test.go
@@ -75,6 +75,9 @@ func (f fakeAPIKeyRepo) ListKeysByUserID(ctx context.Context, userID int64) ([]s
 func (f fakeAPIKeyRepo) ListKeysByGroupID(ctx context.Context, groupID int64) ([]string, error) {
 	return nil, errors.New("not implemented")
 }
+func (f fakeAPIKeyRepo) IncrementQuotaUsed(ctx context.Context, id int64, amount float64) (float64, error) {
+	return 0, errors.New("not implemented")
+}

 type googleErrorResponse struct {
 	Error struct {
--- a/backend/internal/server/middleware/api_key_auth_test.go
+++ b/backend/internal/server/middleware/api_key_auth_test.go
@@ -319,6 +319,10 @@ func (r *stubApiKeyRepo) ListKeysByGroupID(ctx context.Context, groupID int64) (
 	return nil, errors.New("not implemented")
 }

+func (r *stubApiKeyRepo) IncrementQuotaUsed(ctx context.Context, id int64, amount float64) (float64, error) {
+	return 0, errors.New("not implemented")
+}
+
 type stubUserSubscriptionRepo struct {
 	getActive      func(ctx context.Context, userID, groupID int64) (*service.UserSubscription, error)
 	updateStatus   func(ctx context.Context, subscriptionID int64, status string) error
--- a/backend/internal/service/admin_service.go
+++ b/backend/internal/service/admin_service.go
@@ -111,9 +111,14 @@ type CreateGroupInput struct {
 	ImagePrice4K    *float64
 	ClaudeCodeOnly  bool   // 仅允许 Claude Code 客户端
 	FallbackGroupID *int64 // 降级分组 ID
+	// 无效请求兜底分组 ID（仅 anthropic 平台使用）
+	FallbackGroupIDOnInvalidRequest *int64
 	// 模型路由配置（仅 anthropic 平台使用）
 	ModelRouting        map[string][]int64
 	ModelRoutingEnabled bool // 是否启用模型路由
+	MCPXMLInject        *bool
+	// 支持的模型系列（仅 antigravity 平台使用）
+	SupportedModelScopes []string
 	// 从指定分组复制账号（创建分组后在同一事务内绑定）
 	CopyAccountsFromGroupIDs []int64
 }
@@ -135,9 +140,14 @@ type UpdateGroupInput struct {
 	ImagePrice4K    *float64
 	ClaudeCodeOnly  *bool  // 仅允许 Claude Code 客户端
 	FallbackGroupID *int64 // 降级分组 ID
+	// 无效请求兜底分组 ID（仅 anthropic 平台使用）
+	FallbackGroupIDOnInvalidRequest *int64
 	// 模型路由配置（仅 anthropic 平台使用）
 	ModelRouting        map[string][]int64
 	ModelRoutingEnabled *bool // 是否启用模型路由
+	MCPXMLInject        *bool
+	// 支持的模型系列（仅 antigravity 平台使用）
+	SupportedModelScopes *[]string
 	// 从指定分组复制账号（同步操作：先清空当前分组的账号绑定，再绑定源分组的账号）
 	CopyAccountsFromGroupIDs []int64
 }
@@ -594,6 +604,22 @@ func (s *adminServiceImpl) CreateGroup(ctx context.Context, input *CreateGroupIn
 			return nil, err
 		}
 	}
+	fallbackOnInvalidRequest := input.FallbackGroupIDOnInvalidRequest
+	if fallbackOnInvalidRequest != nil && *fallbackOnInvalidRequest <= 0 {
+		fallbackOnInvalidRequest = nil
+	}
+	// 校验无效请求兜底分组
+	if fallbackOnInvalidRequest != nil {
+		if err := s.validateFallbackGroupOnInvalidRequest(ctx, 0, platform, subscriptionType, *fallbackOnInvalidRequest); err != nil {
+			return nil, err
+		}
+	}
+
+	// MCPXMLInject：默认为 true，仅当显式传入 false 时关闭
+	mcpXMLInject := true
+	if input.MCPXMLInject != nil {
+		mcpXMLInject = *input.MCPXMLInject
+	}

 	// 如果指定了复制账号的源分组，先获取账号 ID 列表
 	var accountIDsToCopy []int64
@@ -628,22 +654,25 @@ func (s *adminServiceImpl) CreateGroup(ctx context.Context, input *CreateGroupIn
 	}

 	group := &Group{
-		Name:             input.Name,
-		Description:      input.Description,
-		Platform:         platform,
-		RateMultiplier:   input.RateMultiplier,
-		IsExclusive:      input.IsExclusive,
-		Status:           StatusActive,
-		SubscriptionType: subscriptionType,
-		DailyLimitUSD:    dailyLimit,
-		WeeklyLimitUSD:   weeklyLimit,
-		MonthlyLimitUSD:  monthlyLimit,
-		ImagePrice1K:     imagePrice1K,
-		ImagePrice2K:     imagePrice2K,
-		ImagePrice4K:     imagePrice4K,
-		ClaudeCodeOnly:   input.ClaudeCodeOnly,
-		FallbackGroupID:  input.FallbackGroupID,
-		ModelRouting:     input.ModelRouting,
+		Name:                            input.Name,
+		Description:                     input.Description,
+		Platform:                        platform,
+		RateMultiplier:                  input.RateMultiplier,
+		IsExclusive:                     input.IsExclusive,
+		Status:                          StatusActive,
+		SubscriptionType:                subscriptionType,
+		DailyLimitUSD:                   dailyLimit,
+		WeeklyLimitUSD:                  weeklyLimit,
+		MonthlyLimitUSD:                 monthlyLimit,
+		ImagePrice1K:                    imagePrice1K,
+		ImagePrice2K:                    imagePrice2K,
+		ImagePrice4K:                    imagePrice4K,
+		ClaudeCodeOnly:                  input.ClaudeCodeOnly,
+		FallbackGroupID:                 input.FallbackGroupID,
+		FallbackGroupIDOnInvalidRequest: fallbackOnInvalidRequest,
+		ModelRouting:                    input.ModelRouting,
+		MCPXMLInject:                    mcpXMLInject,
+		SupportedModelScopes:            input.SupportedModelScopes,
 	}
 	if err := s.groupRepo.Create(ctx, group); err != nil {
 		return nil, err
@@ -714,6 +743,37 @@ func (s *adminServiceImpl) validateFallbackGroup(ctx context.Context, currentGro
 	}
 }

+// validateFallbackGroupOnInvalidRequest 校验无效请求兜底分组的有效性
+// currentGroupID: 当前分组 ID（新建时为 0）
+// platform/subscriptionType: 当前分组的有效平台/订阅类型
+// fallbackGroupID: 兜底分组 ID
+func (s *adminServiceImpl) validateFallbackGroupOnInvalidRequest(ctx context.Context, currentGroupID int64, platform, subscriptionType string, fallbackGroupID int64) error {
+	if platform != PlatformAnthropic && platform != PlatformAntigravity {
+		return fmt.Errorf("invalid request fallback only supported for anthropic or antigravity groups")
+	}
+	if subscriptionType == SubscriptionTypeSubscription {
+		return fmt.Errorf("subscription groups cannot set invalid request fallback")
+	}
+	if currentGroupID > 0 && currentGroupID == fallbackGroupID {
+		return fmt.Errorf("cannot set self as invalid request fallback group")
+	}
+
+	fallbackGroup, err := s.groupRepo.GetByIDLite(ctx, fallbackGroupID)
+	if err != nil {
+		return fmt.Errorf("fallback group not found: %w", err)
+	}
+	if fallbackGroup.Platform != PlatformAnthropic {
+		return fmt.Errorf("fallback group must be anthropic platform")
+	}
+	if fallbackGroup.SubscriptionType == SubscriptionTypeSubscription {
+		return fmt.Errorf("fallback group cannot be subscription type")
+	}
+	if fallbackGroup.FallbackGroupIDOnInvalidRequest != nil {
+		return fmt.Errorf("fallback group cannot have invalid request fallback configured")
+	}
+	return nil
+}
+
 func (s *adminServiceImpl) UpdateGroup(ctx context.Context, id int64, input *UpdateGroupInput) (*Group, error) {
 	group, err := s.groupRepo.GetByID(ctx, id)
 	if err != nil {
@@ -780,6 +840,20 @@ func (s *adminServiceImpl) UpdateGroup(ctx context.Context, id int64, input *Upd
 			group.FallbackGroupID = nil
 		}
 	}
+	fallbackOnInvalidRequest := group.FallbackGroupIDOnInvalidRequest
+	if input.FallbackGroupIDOnInvalidRequest != nil {
+		if *input.FallbackGroupIDOnInvalidRequest > 0 {
+			fallbackOnInvalidRequest = input.FallbackGroupIDOnInvalidRequest
+		} else {
+			fallbackOnInvalidRequest = nil
+		}
+	}
+	if fallbackOnInvalidRequest != nil {
+		if err := s.validateFallbackGroupOnInvalidRequest(ctx, id, group.Platform, group.SubscriptionType, *fallbackOnInvalidRequest); err != nil {
+			return nil, err
+		}
+	}
+	group.FallbackGroupIDOnInvalidRequest = fallbackOnInvalidRequest

 	// 模型路由配置
 	if input.ModelRouting != nil {
@@ -788,6 +862,14 @@ func (s *adminServiceImpl) UpdateGroup(ctx context.Context, id int64, input *Upd
 	if input.ModelRoutingEnabled != nil {
 		group.ModelRoutingEnabled = *input.ModelRoutingEnabled
 	}
+	if input.MCPXMLInject != nil {
+		group.MCPXMLInject = *input.MCPXMLInject
+	}
+
+	// 支持的模型系列（仅 antigravity 平台使用）
+	if input.SupportedModelScopes != nil {
+		group.SupportedModelScopes = *input.SupportedModelScopes
+	}

 	if err := s.groupRepo.Update(ctx, group); err != nil {
 		return nil, err
--- a/backend/internal/service/admin_service_group_test.go
+++ b/backend/internal/service/admin_service_group_test.go
@@ -394,3 +394,382 @@ func (s *groupRepoStubForFallbackCycle) BindAccountsToGroup(_ context.Context, _
 func (s *groupRepoStubForFallbackCycle) GetAccountIDsByGroupIDs(_ context.Context, _ []int64) ([]int64, error) {
 	panic("unexpected GetAccountIDsByGroupIDs call")
 }
+
+type groupRepoStubForInvalidRequestFallback struct {
+	groups  map[int64]*Group
+	created *Group
+	updated *Group
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) Create(_ context.Context, g *Group) error {
+	s.created = g
+	return nil
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) Update(_ context.Context, g *Group) error {
+	s.updated = g
+	return nil
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) GetByID(ctx context.Context, id int64) (*Group, error) {
+	return s.GetByIDLite(ctx, id)
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) GetByIDLite(_ context.Context, id int64) (*Group, error) {
+	if g, ok := s.groups[id]; ok {
+		return g, nil
+	}
+	return nil, ErrGroupNotFound
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) Delete(_ context.Context, _ int64) error {
+	panic("unexpected Delete call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) DeleteCascade(_ context.Context, _ int64) ([]int64, error) {
+	panic("unexpected DeleteCascade call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) List(_ context.Context, _ pagination.PaginationParams) ([]Group, *pagination.PaginationResult, error) {
+	panic("unexpected List call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) ListWithFilters(_ context.Context, _ pagination.PaginationParams, _, _, _ string, _ *bool) ([]Group, *pagination.PaginationResult, error) {
+	panic("unexpected ListWithFilters call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) ListActive(_ context.Context) ([]Group, error) {
+	panic("unexpected ListActive call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) ListActiveByPlatform(_ context.Context, _ string) ([]Group, error) {
+	panic("unexpected ListActiveByPlatform call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) ExistsByName(_ context.Context, _ string) (bool, error) {
+	panic("unexpected ExistsByName call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) GetAccountCount(_ context.Context, _ int64) (int64, error) {
+	panic("unexpected GetAccountCount call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) DeleteAccountGroupsByGroupID(_ context.Context, _ int64) (int64, error) {
+	panic("unexpected DeleteAccountGroupsByGroupID call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) GetAccountIDsByGroupIDs(_ context.Context, _ []int64) ([]int64, error) {
+	panic("unexpected GetAccountIDsByGroupIDs call")
+}
+
+func (s *groupRepoStubForInvalidRequestFallback) BindAccountsToGroup(_ context.Context, _ int64, _ []int64) error {
+	panic("unexpected BindAccountsToGroup call")
+}
+
+func TestAdminService_CreateGroup_InvalidRequestFallbackRejectsUnsupportedPlatform(t *testing.T) {
+	fallbackID := int64(10)
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			fallbackID: {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	_, err := svc.CreateGroup(context.Background(), &CreateGroupInput{
+		Name:                            "g1",
+		Platform:                        PlatformOpenAI,
+		SubscriptionType:                SubscriptionTypeStandard,
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	})
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "invalid request fallback only supported for anthropic or antigravity groups")
+	require.Nil(t, repo.created)
+}
+
+func TestAdminService_CreateGroup_InvalidRequestFallbackRejectsSubscription(t *testing.T) {
+	fallbackID := int64(10)
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			fallbackID: {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	_, err := svc.CreateGroup(context.Background(), &CreateGroupInput{
+		Name:                            "g1",
+		Platform:                        PlatformAnthropic,
+		SubscriptionType:                SubscriptionTypeSubscription,
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	})
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "subscription groups cannot set invalid request fallback")
+	require.Nil(t, repo.created)
+}
+
+func TestAdminService_CreateGroup_InvalidRequestFallbackRejectsFallbackGroup(t *testing.T) {
+	tests := []struct {
+		name        string
+		fallback    *Group
+		wantMessage string
+	}{
+		{
+			name:        "openai_target",
+			fallback:    &Group{ID: 10, Platform: PlatformOpenAI, SubscriptionType: SubscriptionTypeStandard},
+			wantMessage: "fallback group must be anthropic platform",
+		},
+		{
+			name:        "antigravity_target",
+			fallback:    &Group{ID: 10, Platform: PlatformAntigravity, SubscriptionType: SubscriptionTypeStandard},
+			wantMessage: "fallback group must be anthropic platform",
+		},
+		{
+			name:        "subscription_group",
+			fallback:    &Group{ID: 10, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeSubscription},
+			wantMessage: "fallback group cannot be subscription type",
+		},
+		{
+			name: "nested_fallback",
+			fallback: &Group{
+				ID:                              10,
+				Platform:                        PlatformAnthropic,
+				SubscriptionType:                SubscriptionTypeStandard,
+				FallbackGroupIDOnInvalidRequest: func() *int64 { v := int64(99); return &v }(),
+			},
+			wantMessage: "fallback group cannot have invalid request fallback configured",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			fallbackID := tc.fallback.ID
+			repo := &groupRepoStubForInvalidRequestFallback{
+				groups: map[int64]*Group{
+					fallbackID: tc.fallback,
+				},
+			}
+			svc := &adminServiceImpl{groupRepo: repo}
+
+			_, err := svc.CreateGroup(context.Background(), &CreateGroupInput{
+				Name:                            "g1",
+				Platform:                        PlatformAnthropic,
+				SubscriptionType:                SubscriptionTypeStandard,
+				FallbackGroupIDOnInvalidRequest: &fallbackID,
+			})
+			require.Error(t, err)
+			require.Contains(t, err.Error(), tc.wantMessage)
+			require.Nil(t, repo.created)
+		})
+	}
+}
+
+func TestAdminService_CreateGroup_InvalidRequestFallbackNotFound(t *testing.T) {
+	fallbackID := int64(10)
+	repo := &groupRepoStubForInvalidRequestFallback{}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	_, err := svc.CreateGroup(context.Background(), &CreateGroupInput{
+		Name:                            "g1",
+		Platform:                        PlatformAnthropic,
+		SubscriptionType:                SubscriptionTypeStandard,
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	})
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "fallback group not found")
+	require.Nil(t, repo.created)
+}
+
+func TestAdminService_CreateGroup_InvalidRequestFallbackAllowsAntigravity(t *testing.T) {
+	fallbackID := int64(10)
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			fallbackID: {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	group, err := svc.CreateGroup(context.Background(), &CreateGroupInput{
+		Name:                            "g1",
+		Platform:                        PlatformAntigravity,
+		SubscriptionType:                SubscriptionTypeStandard,
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	})
+	require.NoError(t, err)
+	require.NotNil(t, group)
+	require.NotNil(t, repo.created)
+	require.Equal(t, fallbackID, *repo.created.FallbackGroupIDOnInvalidRequest)
+}
+
+func TestAdminService_CreateGroup_InvalidRequestFallbackClearsOnZero(t *testing.T) {
+	zero := int64(0)
+	repo := &groupRepoStubForInvalidRequestFallback{}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	group, err := svc.CreateGroup(context.Background(), &CreateGroupInput{
+		Name:                            "g1",
+		Platform:                        PlatformAnthropic,
+		SubscriptionType:                SubscriptionTypeStandard,
+		FallbackGroupIDOnInvalidRequest: &zero,
+	})
+	require.NoError(t, err)
+	require.NotNil(t, group)
+	require.NotNil(t, repo.created)
+	require.Nil(t, repo.created.FallbackGroupIDOnInvalidRequest)
+}
+
+func TestAdminService_UpdateGroup_InvalidRequestFallbackPlatformMismatch(t *testing.T) {
+	fallbackID := int64(10)
+	existing := &Group{
+		ID:                              1,
+		Name:                            "g1",
+		Platform:                        PlatformAnthropic,
+		SubscriptionType:                SubscriptionTypeStandard,
+		Status:                          StatusActive,
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	}
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			existing.ID: existing,
+			fallbackID:  {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	_, err := svc.UpdateGroup(context.Background(), existing.ID, &UpdateGroupInput{
+		Platform: PlatformOpenAI,
+	})
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "invalid request fallback only supported for anthropic or antigravity groups")
+	require.Nil(t, repo.updated)
+}
+
+func TestAdminService_UpdateGroup_InvalidRequestFallbackSubscriptionMismatch(t *testing.T) {
+	fallbackID := int64(10)
+	existing := &Group{
+		ID:                              1,
+		Name:                            "g1",
+		Platform:                        PlatformAnthropic,
+		SubscriptionType:                SubscriptionTypeStandard,
+		Status:                          StatusActive,
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	}
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			existing.ID: existing,
+			fallbackID:  {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	_, err := svc.UpdateGroup(context.Background(), existing.ID, &UpdateGroupInput{
+		SubscriptionType: SubscriptionTypeSubscription,
+	})
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "subscription groups cannot set invalid request fallback")
+	require.Nil(t, repo.updated)
+}
+
+func TestAdminService_UpdateGroup_InvalidRequestFallbackClearsOnZero(t *testing.T) {
+	fallbackID := int64(10)
+	existing := &Group{
+		ID:                              1,
+		Name:                            "g1",
+		Platform:                        PlatformAnthropic,
+		SubscriptionType:                SubscriptionTypeStandard,
+		Status:                          StatusActive,
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	}
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			existing.ID: existing,
+			fallbackID:  {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	clear := int64(0)
+	group, err := svc.UpdateGroup(context.Background(), existing.ID, &UpdateGroupInput{
+		Platform:                        PlatformOpenAI,
+		FallbackGroupIDOnInvalidRequest: &clear,
+	})
+	require.NoError(t, err)
+	require.NotNil(t, group)
+	require.NotNil(t, repo.updated)
+	require.Nil(t, repo.updated.FallbackGroupIDOnInvalidRequest)
+}
+
+func TestAdminService_UpdateGroup_InvalidRequestFallbackRejectsFallbackGroup(t *testing.T) {
+	fallbackID := int64(10)
+	existing := &Group{
+		ID:               1,
+		Name:             "g1",
+		Platform:         PlatformAnthropic,
+		SubscriptionType: SubscriptionTypeStandard,
+		Status:           StatusActive,
+	}
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			existing.ID: existing,
+			fallbackID:  {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeSubscription},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	_, err := svc.UpdateGroup(context.Background(), existing.ID, &UpdateGroupInput{
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	})
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "fallback group cannot be subscription type")
+	require.Nil(t, repo.updated)
+}
+
+func TestAdminService_UpdateGroup_InvalidRequestFallbackSetSuccess(t *testing.T) {
+	fallbackID := int64(10)
+	existing := &Group{
+		ID:               1,
+		Name:             "g1",
+		Platform:         PlatformAnthropic,
+		SubscriptionType: SubscriptionTypeStandard,
+		Status:           StatusActive,
+	}
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			existing.ID: existing,
+			fallbackID:  {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	group, err := svc.UpdateGroup(context.Background(), existing.ID, &UpdateGroupInput{
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	})
+	require.NoError(t, err)
+	require.NotNil(t, group)
+	require.NotNil(t, repo.updated)
+	require.Equal(t, fallbackID, *repo.updated.FallbackGroupIDOnInvalidRequest)
+}
+
+func TestAdminService_UpdateGroup_InvalidRequestFallbackAllowsAntigravity(t *testing.T) {
+	fallbackID := int64(10)
+	existing := &Group{
+		ID:               1,
+		Name:             "g1",
+		Platform:         PlatformAntigravity,
+		SubscriptionType: SubscriptionTypeStandard,
+		Status:           StatusActive,
+	}
+	repo := &groupRepoStubForInvalidRequestFallback{
+		groups: map[int64]*Group{
+			existing.ID: existing,
+			fallbackID:  {ID: fallbackID, Platform: PlatformAnthropic, SubscriptionType: SubscriptionTypeStandard},
+		},
+	}
+	svc := &adminServiceImpl{groupRepo: repo}
+
+	group, err := svc.UpdateGroup(context.Background(), existing.ID, &UpdateGroupInput{
+		FallbackGroupIDOnInvalidRequest: &fallbackID,
+	})
+	require.NoError(t, err)
+	require.NotNil(t, group)
+	require.NotNil(t, repo.updated)
+	require.Equal(t, fallbackID, *repo.updated.FallbackGroupIDOnInvalidRequest)
+}
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -13,23 +13,34 @@ import (
 	"net"
 	"net/http"
 	"os"
+	"strconv"
 	"strings"
 	"sync/atomic"
 	"time"

 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 )

 const (
-	antigravityStickySessionTTL = time.Hour
-	antigravityMaxRetries       = 3
-	antigravityRetryBaseDelay   = 1 * time.Second
-	antigravityRetryMaxDelay    = 16 * time.Second
+	antigravityStickySessionTTL  = time.Hour
+	antigravityDefaultMaxRetries = 3
+	antigravityRetryBaseDelay    = 1 * time.Second
+	antigravityRetryMaxDelay     = 16 * time.Second
 )

-const antigravityScopeRateLimitEnv = "GATEWAY_ANTIGRAVITY_429_SCOPE_LIMIT"
+const (
+	antigravityMaxRetriesEnv            = "GATEWAY_ANTIGRAVITY_MAX_RETRIES"
+	antigravityMaxRetriesAfterSwitchEnv = "GATEWAY_ANTIGRAVITY_AFTER_SWITCHMAX_RETRIES"
+	antigravityMaxRetriesClaudeEnv      = "GATEWAY_ANTIGRAVITY_MAX_RETRIES_CLAUDE"
+	antigravityMaxRetriesGeminiTextEnv  = "GATEWAY_ANTIGRAVITY_MAX_RETRIES_GEMINI_TEXT"
+	antigravityMaxRetriesGeminiImageEnv = "GATEWAY_ANTIGRAVITY_MAX_RETRIES_GEMINI_IMAGE"
+	antigravityScopeRateLimitEnv        = "GATEWAY_ANTIGRAVITY_429_SCOPE_LIMIT"
+	antigravityBillingModelEnv          = "GATEWAY_ANTIGRAVITY_BILL_WITH_MAPPED_MODEL"
+	antigravityFallbackSecondsEnv       = "GATEWAY_ANTIGRAVITY_FALLBACK_COOLDOWN_SECONDS"
+)

 // antigravityRetryLoopParams 重试循环的参数
 type antigravityRetryLoopParams struct {
@@ -41,6 +52,7 @@ type antigravityRetryLoopParams struct {
 	action         string
 	body           []byte
 	quotaScope     AntigravityQuotaScope
+	maxRetries     int
 	c              *gin.Context
 	httpUpstream   HTTPUpstream
 	settingService *SettingService
@@ -52,11 +64,28 @@ type antigravityRetryLoopResult struct {
 	resp *http.Response
 }

+// PromptTooLongError 表示上游明确返回 prompt too long
+type PromptTooLongError struct {
+	StatusCode int
+	RequestID  string
+	Body       []byte
+}
+
+func (e *PromptTooLongError) Error() string {
+	return fmt.Sprintf("prompt too long: status=%d", e.StatusCode)
+}
+
 // antigravityRetryLoop 执行带 URL fallback 的重试循环
 func antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
-	availableURLs := antigravity.DefaultURLAvailability.GetAvailableURLs()
+	baseURLs := antigravity.ForwardBaseURLs()
+	availableURLs := antigravity.DefaultURLAvailability.GetAvailableURLsWithBase(baseURLs)
 	if len(availableURLs) == 0 {
-		availableURLs = antigravity.BaseURLs
+		availableURLs = baseURLs
+	}
+
+	maxRetries := p.maxRetries
+	if maxRetries <= 0 {
+		maxRetries = antigravityDefaultMaxRetries
 	}

 	var resp *http.Response
@@ -76,7 +105,7 @@ func antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopRe
 urlFallbackLoop:
 	for urlIdx, baseURL := range availableURLs {
 		usedBaseURL = baseURL
-		for attempt := 1; attempt <= antigravityMaxRetries; attempt++ {
+		for attempt := 1; attempt <= maxRetries; attempt++ {
 			select {
 			case <-p.ctx.Done():
 				log.Printf("%s status=context_canceled error=%v", p.prefix, p.ctx.Err())
@@ -109,8 +138,8 @@ urlFallbackLoop:
 					log.Printf("%s URL fallback (connection error): %s -> %s", p.prefix, baseURL, availableURLs[urlIdx+1])
 					continue urlFallbackLoop
 				}
-				if attempt < antigravityMaxRetries {
-					log.Printf("%s status=request_failed retry=%d/%d error=%v", p.prefix, attempt, antigravityMaxRetries, err)
+				if attempt < maxRetries {
+					log.Printf("%s status=request_failed retry=%d/%d error=%v", p.prefix, attempt, maxRetries, err)
 					if !sleepAntigravityBackoffWithContext(p.ctx, attempt) {
 						log.Printf("%s status=context_canceled_during_backoff", p.prefix)
 						return nil, p.ctx.Err()
@@ -134,7 +163,7 @@ urlFallbackLoop:
 				}

 				// 账户/模型配额限流，重试 3 次（指数退避）
-				if attempt < antigravityMaxRetries {
+				if attempt < maxRetries {
 					upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
 					upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
 					appendOpsUpstreamError(p.c, OpsUpstreamErrorEvent{
@@ -147,7 +176,7 @@ urlFallbackLoop:
 						Message:            upstreamMsg,
 						Detail:             getUpstreamDetail(respBody),
 					})
-					log.Printf("%s status=429 retry=%d/%d body=%s", p.prefix, attempt, antigravityMaxRetries, truncateForLog(respBody, 200))
+					log.Printf("%s status=429 retry=%d/%d body=%s", p.prefix, attempt, maxRetries, truncateForLog(respBody, 200))
 					if !sleepAntigravityBackoffWithContext(p.ctx, attempt) {
 						log.Printf("%s status=context_canceled_during_backoff", p.prefix)
 						return nil, p.ctx.Err()
@@ -171,7 +200,7 @@ urlFallbackLoop:
 				respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 				_ = resp.Body.Close()

-				if attempt < antigravityMaxRetries {
+				if attempt < maxRetries {
 					upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
 					upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
 					appendOpsUpstreamError(p.c, OpsUpstreamErrorEvent{
@@ -184,7 +213,7 @@ urlFallbackLoop:
 						Message:            upstreamMsg,
 						Detail:             getUpstreamDetail(respBody),
 					})
-					log.Printf("%s status=%d retry=%d/%d body=%s", p.prefix, resp.StatusCode, attempt, antigravityMaxRetries, truncateForLog(respBody, 500))
+					log.Printf("%s status=%d retry=%d/%d body=%s", p.prefix, resp.StatusCode, attempt, maxRetries, truncateForLog(respBody, 500))
 					if !sleepAntigravityBackoffWithContext(p.ctx, attempt) {
 						log.Printf("%s status=context_canceled_during_backoff", p.prefix)
 						return nil, p.ctx.Err()
@@ -390,6 +419,11 @@ type TestConnectionResult struct {
 // TestConnection 测试 Antigravity 账号连接（非流式，无重试、无计费）
 // 支持 Claude 和 Gemini 两种协议，根据 modelID 前缀自动选择
 func (s *AntigravityGatewayService) TestConnection(ctx context.Context, account *Account, modelID string) (*TestConnectionResult, error) {
+	// 上游透传账号使用专用测试方法
+	if account.Type == AccountTypeUpstream {
+		return s.testUpstreamConnection(ctx, account, modelID)
+	}
+
 	// 获取 token
 	if s.tokenProvider == nil {
 		return nil, errors.New("antigravity token provider not configured")
@@ -484,6 +518,87 @@ func (s *AntigravityGatewayService) TestConnection(ctx context.Context, account
 	return nil, lastErr
 }

+// testUpstreamConnection 测试上游透传账号连接
+func (s *AntigravityGatewayService) testUpstreamConnection(ctx context.Context, account *Account, modelID string) (*TestConnectionResult, error) {
+	baseURL := strings.TrimSpace(account.GetCredential("base_url"))
+	apiKey := strings.TrimSpace(account.GetCredential("api_key"))
+	if baseURL == "" || apiKey == "" {
+		return nil, errors.New("upstream account missing base_url or api_key")
+	}
+	baseURL = strings.TrimSuffix(baseURL, "/")
+
+	// 使用 Claude 模型进行测试
+	if modelID == "" {
+		modelID = "claude-sonnet-4-20250514"
+	}
+
+	// 构建最小测试请求
+	testReq := map[string]any{
+		"model":      modelID,
+		"max_tokens": 1,
+		"messages": []map[string]any{
+			{"role": "user", "content": "."},
+		},
+	}
+	requestBody, err := json.Marshal(testReq)
+	if err != nil {
+		return nil, fmt.Errorf("构建请求失败: %w", err)
+	}
+
+	// 构建 HTTP 请求
+	upstreamURL := baseURL + "/v1/messages"
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, upstreamURL, bytes.NewReader(requestBody))
+	if err != nil {
+		return nil, fmt.Errorf("创建请求失败: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+apiKey)
+	req.Header.Set("x-api-key", apiKey)
+	req.Header.Set("anthropic-version", "2023-06-01")
+
+	// 代理 URL
+	proxyURL := ""
+	if account.ProxyID != nil && account.Proxy != nil {
+		proxyURL = account.Proxy.URL()
+	}
+
+	log.Printf("[antigravity-Test-Upstream] account=%s url=%s", account.Name, upstreamURL)
+
+	// 发送请求
+	resp, err := s.httpUpstream.Do(req, proxyURL, account.ID, account.Concurrency)
+	if err != nil {
+		return nil, fmt.Errorf("请求失败: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	respBody, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
+	if err != nil {
+		return nil, fmt.Errorf("读取响应失败: %w", err)
+	}
+
+	if resp.StatusCode >= 400 {
+		return nil, fmt.Errorf("API 返回 %d: %s", resp.StatusCode, string(respBody))
+	}
+
+	// 提取响应文本
+	var respData map[string]any
+	text := ""
+	if json.Unmarshal(respBody, &respData) == nil {
+		if content, ok := respData["content"].([]any); ok && len(content) > 0 {
+			if block, ok := content[0].(map[string]any); ok {
+				if t, ok := block["text"].(string); ok {
+					text = t
+				}
+			}
+		}
+	}
+
+	return &TestConnectionResult{
+		Text:        text,
+		MappedModel: modelID,
+	}, nil
+}
+
 // buildGeminiTestRequest 构建 Gemini 格式测试请求
 // 使用最小 token 消耗：输入 "." + maxOutputTokens: 1
 func (s *AntigravityGatewayService) buildGeminiTestRequest(projectID, model string) ([]byte, error) {
@@ -534,6 +649,10 @@ func (s *AntigravityGatewayService) getClaudeTransformOptions(ctx context.Contex
 	}
 	opts.EnableIdentityPatch = s.settingService.IsIdentityPatchEnabled(ctx)
 	opts.IdentityPatch = s.settingService.GetIdentityPatchPrompt(ctx)
+
+	if group, ok := ctx.Value(ctxkey.Group).(*Group); ok && group != nil {
+		opts.EnableMCPXML = group.MCPXMLInject
+	}
 	return opts
 }

@@ -702,6 +821,11 @@ func isModelNotFoundError(statusCode int, body []byte) bool {

 // Forward 转发 Claude 协议请求（Claude → Gemini 转换）
 func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context, account *Account, body []byte) (*ForwardResult, error) {
+	// 上游透传账号直接转发，不走 OAuth token 刷新
+	if account.Type == AccountTypeUpstream {
+		return s.ForwardUpstream(ctx, c, account, body)
+	}
+
 	startTime := time.Now()
 	sessionID := getSessionID(c)
 	prefix := logPrefix(sessionID, account.Name)
@@ -718,6 +842,12 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 	originalModel := claudeReq.Model
 	mappedModel := s.getMappedModel(account, claudeReq.Model)
 	quotaScope, _ := resolveAntigravityQuotaScope(originalModel)
+	billingModel := originalModel
+	if antigravityUseMappedModelForBilling() && strings.TrimSpace(mappedModel) != "" {
+		billingModel = mappedModel
+	}
+	afterSwitch := antigravityHasAccountSwitch(ctx)
+	maxRetries := antigravityMaxRetriesForModel(originalModel, afterSwitch)

 	// 获取 access_token
 	if s.tokenProvider == nil {
@@ -766,6 +896,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 		httpUpstream:   s.httpUpstream,
 		settingService: s.settingService,
 		handleError:    s.handleUpstreamError,
+		maxRetries:     maxRetries,
 	})
 	if err != nil {
 		return nil, s.writeClaudeError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed after retries")
@@ -842,6 +973,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 					httpUpstream:   s.httpUpstream,
 					settingService: s.settingService,
 					handleError:    s.handleUpstreamError,
+					maxRetries:     maxRetries,
 				})
 				if retryErr != nil {
 					appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
@@ -917,6 +1049,39 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,

 		// 处理错误响应（重试后仍失败或不触发重试）
 		if resp.StatusCode >= 400 {
+			if resp.StatusCode == http.StatusBadRequest {
+				upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
+				upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
+				log.Printf("%s status=400 prompt_too_long=%v upstream_message=%q request_id=%s body=%s", prefix, isPromptTooLongError(respBody), upstreamMsg, resp.Header.Get("x-request-id"), truncateForLog(respBody, 500))
+			}
+			if resp.StatusCode == http.StatusBadRequest && isPromptTooLongError(respBody) {
+				upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
+				upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
+				logBody := s.settingService != nil && s.settingService.cfg != nil && s.settingService.cfg.Gateway.LogUpstreamErrorBody
+				maxBytes := 2048
+				if s.settingService != nil && s.settingService.cfg != nil && s.settingService.cfg.Gateway.LogUpstreamErrorBodyMaxBytes > 0 {
+					maxBytes = s.settingService.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+				}
+				upstreamDetail := ""
+				if logBody {
+					upstreamDetail = truncateString(string(respBody), maxBytes)
+				}
+				appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+					Platform:           account.Platform,
+					AccountID:          account.ID,
+					AccountName:        account.Name,
+					UpstreamStatusCode: resp.StatusCode,
+					UpstreamRequestID:  resp.Header.Get("x-request-id"),
+					Kind:               "prompt_too_long",
+					Message:            upstreamMsg,
+					Detail:             upstreamDetail,
+				})
+				return nil, &PromptTooLongError{
+					StatusCode: resp.StatusCode,
+					RequestID:  resp.Header.Get("x-request-id"),
+					Body:       respBody,
+				}
+			}
 			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, quotaScope)

 			if s.shouldFailoverUpstreamError(resp.StatusCode) {
@@ -978,7 +1143,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 	return &ForwardResult{
 		RequestID:    requestID,
 		Usage:        *usage,
-		Model:        originalModel, // 使用原始模型用于计费和日志
+		Model:        billingModel, // 计费模型（可按映射模型覆盖）
 		Stream:       claudeReq.Stream,
 		Duration:     time.Since(startTime),
 		FirstTokenMs: firstTokenMs,
@@ -1003,24 +1168,64 @@ func isSignatureRelatedError(respBody []byte) bool {
 		return true
 	}

+	// Detect thinking block modification errors:
+	// "thinking or redacted_thinking blocks in the latest assistant message cannot be modified"
+	if strings.Contains(msg, "cannot be modified") && (strings.Contains(msg, "thinking") || strings.Contains(msg, "redacted_thinking")) {
+		return true
+	}
+
 	return false
 }

+func isPromptTooLongError(respBody []byte) bool {
+	msg := strings.ToLower(strings.TrimSpace(extractAntigravityErrorMessage(respBody)))
+	if msg == "" {
+		msg = strings.ToLower(string(respBody))
+	}
+	return strings.Contains(msg, "prompt is too long")
+}
+
 func extractAntigravityErrorMessage(body []byte) string {
 	var payload map[string]any
 	if err := json.Unmarshal(body, &payload); err != nil {
 		return ""
 	}

+	parseNestedMessage := func(msg string) string {
+		trimmed := strings.TrimSpace(msg)
+		if trimmed == "" || !strings.HasPrefix(trimmed, "{") {
+			return ""
+		}
+		var nested map[string]any
+		if err := json.Unmarshal([]byte(trimmed), &nested); err != nil {
+			return ""
+		}
+		if errObj, ok := nested["error"].(map[string]any); ok {
+			if innerMsg, ok := errObj["message"].(string); ok && strings.TrimSpace(innerMsg) != "" {
+				return innerMsg
+			}
+		}
+		if innerMsg, ok := nested["message"].(string); ok && strings.TrimSpace(innerMsg) != "" {
+			return innerMsg
+		}
+		return ""
+	}
+
 	// Google-style: {"error": {"message": "..."}}
 	if errObj, ok := payload["error"].(map[string]any); ok {
 		if msg, ok := errObj["message"].(string); ok && strings.TrimSpace(msg) != "" {
+			if innerMsg := parseNestedMessage(msg); innerMsg != "" {
+				return innerMsg
+			}
 			return msg
 		}
 	}

 	// Fallback: top-level message
 	if msg, ok := payload["message"].(string); ok && strings.TrimSpace(msg) != "" {
+		if innerMsg := parseNestedMessage(msg); innerMsg != "" {
+			return innerMsg
+		}
 		return msg
 	}

@@ -1248,6 +1453,208 @@ func stripSignatureSensitiveBlocksFromClaudeRequest(req *antigravity.ClaudeReque
 	return changed, nil
 }

+// ForwardUpstream 透传请求到上游 Antigravity 服务
+// 用于 upstream 类型账号，直接使用 base_url + api_key 转发，不走 OAuth token
+func (s *AntigravityGatewayService) ForwardUpstream(ctx context.Context, c *gin.Context, account *Account, body []byte) (*ForwardResult, error) {
+	startTime := time.Now()
+	sessionID := getSessionID(c)
+	prefix := logPrefix(sessionID, account.Name)
+
+	// 获取上游配置
+	baseURL := strings.TrimSpace(account.GetCredential("base_url"))
+	apiKey := strings.TrimSpace(account.GetCredential("api_key"))
+	if baseURL == "" || apiKey == "" {
+		return nil, fmt.Errorf("upstream account missing base_url or api_key")
+	}
+	baseURL = strings.TrimSuffix(baseURL, "/")
+
+	// 解析请求获取模型信息
+	var claudeReq antigravity.ClaudeRequest
+	if err := json.Unmarshal(body, &claudeReq); err != nil {
+		return nil, fmt.Errorf("parse claude request: %w", err)
+	}
+	if strings.TrimSpace(claudeReq.Model) == "" {
+		return nil, fmt.Errorf("missing model")
+	}
+	originalModel := claudeReq.Model
+	billingModel := originalModel
+
+	// 构建上游请求 URL
+	upstreamURL := baseURL + "/v1/messages"
+
+	// 创建请求
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, upstreamURL, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("create upstream request: %w", err)
+	}
+
+	// 设置请求头
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+apiKey)
+	req.Header.Set("x-api-key", apiKey) // Claude API 兼容
+
+	// 透传 Claude 相关 headers
+	if v := c.GetHeader("anthropic-version"); v != "" {
+		req.Header.Set("anthropic-version", v)
+	}
+	if v := c.GetHeader("anthropic-beta"); v != "" {
+		req.Header.Set("anthropic-beta", v)
+	}
+
+	// 代理 URL
+	proxyURL := ""
+	if account.ProxyID != nil && account.Proxy != nil {
+		proxyURL = account.Proxy.URL()
+	}
+
+	// 发送请求
+	resp, err := s.httpUpstream.Do(req, proxyURL, account.ID, account.Concurrency)
+	if err != nil {
+		log.Printf("%s upstream request failed: %v", prefix, err)
+		return nil, fmt.Errorf("upstream request failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	// 处理错误响应
+	if resp.StatusCode >= 400 {
+		respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
+
+		// 429 错误时标记账号限流
+		if resp.StatusCode == http.StatusTooManyRequests {
+			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, AntigravityQuotaScopeClaude)
+		}
+
+		// 透传上游错误
+		c.Header("Content-Type", resp.Header.Get("Content-Type"))
+		c.Status(resp.StatusCode)
+		_, _ = c.Writer.Write(respBody)
+
+		return &ForwardResult{
+			Model: billingModel,
+		}, nil
+	}
+
+	// 处理成功响应（流式/非流式）
+	var usage *ClaudeUsage
+	var firstTokenMs *int
+
+	if claudeReq.Stream {
+		// 流式响应：透传
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("X-Accel-Buffering", "no")
+		c.Status(http.StatusOK)
+
+		usage, firstTokenMs = s.streamUpstreamResponse(c, resp, startTime)
+	} else {
+		// 非流式响应：直接透传
+		respBody, err := io.ReadAll(resp.Body)
+		if err != nil {
+			return nil, fmt.Errorf("read upstream response: %w", err)
+		}
+
+		// 提取 usage
+		usage = s.extractClaudeUsage(respBody)
+
+		c.Header("Content-Type", resp.Header.Get("Content-Type"))
+		c.Status(http.StatusOK)
+		_, _ = c.Writer.Write(respBody)
+	}
+
+	// 构建计费结果
+	duration := time.Since(startTime)
+	log.Printf("%s status=success duration_ms=%d", prefix, duration.Milliseconds())
+
+	return &ForwardResult{
+		Model:        billingModel,
+		Stream:       claudeReq.Stream,
+		Duration:     duration,
+		FirstTokenMs: firstTokenMs,
+		Usage: ClaudeUsage{
+			InputTokens:              usage.InputTokens,
+			OutputTokens:             usage.OutputTokens,
+			CacheReadInputTokens:     usage.CacheReadInputTokens,
+			CacheCreationInputTokens: usage.CacheCreationInputTokens,
+		},
+	}, nil
+}
+
+// streamUpstreamResponse 透传上游流式响应并提取 usage
+func (s *AntigravityGatewayService) streamUpstreamResponse(c *gin.Context, resp *http.Response, startTime time.Time) (*ClaudeUsage, *int) {
+	usage := &ClaudeUsage{}
+	var firstTokenMs *int
+	var firstTokenRecorded bool
+
+	scanner := bufio.NewScanner(resp.Body)
+	buf := make([]byte, 0, 64*1024)
+	scanner.Buffer(buf, 1024*1024)
+
+	for scanner.Scan() {
+		line := scanner.Bytes()
+
+		// 记录首 token 时间
+		if !firstTokenRecorded && len(line) > 0 {
+			ms := int(time.Since(startTime).Milliseconds())
+			firstTokenMs = &ms
+			firstTokenRecorded = true
+		}
+
+		// 尝试从 message_delta 或 message_stop 事件提取 usage
+		if bytes.HasPrefix(line, []byte("data: ")) {
+			dataStr := bytes.TrimPrefix(line, []byte("data: "))
+			var event map[string]any
+			if json.Unmarshal(dataStr, &event) == nil {
+				if u, ok := event["usage"].(map[string]any); ok {
+					if v, ok := u["input_tokens"].(float64); ok && int(v) > 0 {
+						usage.InputTokens = int(v)
+					}
+					if v, ok := u["output_tokens"].(float64); ok && int(v) > 0 {
+						usage.OutputTokens = int(v)
+					}
+					if v, ok := u["cache_read_input_tokens"].(float64); ok && int(v) > 0 {
+						usage.CacheReadInputTokens = int(v)
+					}
+					if v, ok := u["cache_creation_input_tokens"].(float64); ok && int(v) > 0 {
+						usage.CacheCreationInputTokens = int(v)
+					}
+				}
+			}
+		}
+
+		// 透传行
+		_, _ = c.Writer.Write(line)
+		_, _ = c.Writer.Write([]byte("\n"))
+		c.Writer.Flush()
+	}
+
+	return usage, firstTokenMs
+}
+
+// extractClaudeUsage 从非流式 Claude 响应提取 usage
+func (s *AntigravityGatewayService) extractClaudeUsage(body []byte) *ClaudeUsage {
+	usage := &ClaudeUsage{}
+	var resp map[string]any
+	if json.Unmarshal(body, &resp) != nil {
+		return usage
+	}
+	if u, ok := resp["usage"].(map[string]any); ok {
+		if v, ok := u["input_tokens"].(float64); ok {
+			usage.InputTokens = int(v)
+		}
+		if v, ok := u["output_tokens"].(float64); ok {
+			usage.OutputTokens = int(v)
+		}
+		if v, ok := u["cache_read_input_tokens"].(float64); ok {
+			usage.CacheReadInputTokens = int(v)
+		}
+		if v, ok := u["cache_creation_input_tokens"].(float64); ok {
+			usage.CacheCreationInputTokens = int(v)
+		}
+	}
+	return usage
+}
+
 // ForwardGemini 转发 Gemini 协议请求
 func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Context, account *Account, originalModel string, action string, stream bool, body []byte) (*ForwardResult, error) {
 	startTime := time.Now()
@@ -1287,6 +1694,12 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 	}

 	mappedModel := s.getMappedModel(account, originalModel)
+	billingModel := originalModel
+	if antigravityUseMappedModelForBilling() && strings.TrimSpace(mappedModel) != "" {
+		billingModel = mappedModel
+	}
+	afterSwitch := antigravityHasAccountSwitch(ctx)
+	maxRetries := antigravityMaxRetriesForModel(originalModel, afterSwitch)

 	// 获取 access_token
 	if s.tokenProvider == nil {
@@ -1306,8 +1719,15 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 		proxyURL = account.Proxy.URL()
 	}

+	// 过滤掉 parts 为空的消息（Gemini API 不接受空 parts）
+	filteredBody, err := filterEmptyPartsFromGeminiRequest(body)
+	if err != nil {
+		log.Printf("[Antigravity] Failed to filter empty parts: %v", err)
+		filteredBody = body
+	}
+
 	// Antigravity 上游要求必须包含身份提示词，注入到请求中
-	injectedBody, err := injectIdentityPatchToGeminiRequest(body)
+	injectedBody, err := injectIdentityPatchToGeminiRequest(filteredBody)
 	if err != nil {
 		return nil, err
 	}
@@ -1344,6 +1764,7 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 		httpUpstream:   s.httpUpstream,
 		settingService: s.settingService,
 		handleError:    s.handleUpstreamError,
+		maxRetries:     maxRetries,
 	})
 	if err != nil {
 		return nil, s.writeGoogleError(c, http.StatusBadGateway, "Upstream request failed after retries")
@@ -1493,7 +1914,7 @@ handleSuccess:
 	return &ForwardResult{
 		RequestID:    requestID,
 		Usage:        *usage,
-		Model:        originalModel,
+		Model:        billingModel,
 		Stream:       stream,
 		Duration:     time.Since(startTime),
 		FirstTokenMs: firstTokenMs,
@@ -1544,6 +1965,81 @@ func antigravityUseScopeRateLimit() bool {
 	return true
 }

+func antigravityHasAccountSwitch(ctx context.Context) bool {
+	if ctx == nil {
+		return false
+	}
+	if v, ok := ctx.Value(ctxkey.AccountSwitchCount).(int); ok {
+		return v > 0
+	}
+	return false
+}
+
+func antigravityMaxRetries() int {
+	raw := strings.TrimSpace(os.Getenv(antigravityMaxRetriesEnv))
+	if raw == "" {
+		return antigravityDefaultMaxRetries
+	}
+	value, err := strconv.Atoi(raw)
+	if err != nil || value <= 0 {
+		return antigravityDefaultMaxRetries
+	}
+	return value
+}
+
+func antigravityMaxRetriesAfterSwitch() int {
+	raw := strings.TrimSpace(os.Getenv(antigravityMaxRetriesAfterSwitchEnv))
+	if raw == "" {
+		return antigravityMaxRetries()
+	}
+	value, err := strconv.Atoi(raw)
+	if err != nil || value <= 0 {
+		return antigravityMaxRetries()
+	}
+	return value
+}
+
+// antigravityMaxRetriesForModel 根据模型类型获取重试次数
+// 优先使用模型细分配置，未设置则回退到平台级配置
+func antigravityMaxRetriesForModel(model string, afterSwitch bool) int {
+	var envKey string
+	if strings.HasPrefix(model, "claude-") {
+		envKey = antigravityMaxRetriesClaudeEnv
+	} else if isImageGenerationModel(model) {
+		envKey = antigravityMaxRetriesGeminiImageEnv
+	} else if strings.HasPrefix(model, "gemini-") {
+		envKey = antigravityMaxRetriesGeminiTextEnv
+	}
+
+	if envKey != "" {
+		if raw := strings.TrimSpace(os.Getenv(envKey)); raw != "" {
+			if value, err := strconv.Atoi(raw); err == nil && value > 0 {
+				return value
+			}
+		}
+	}
+	if afterSwitch {
+		return antigravityMaxRetriesAfterSwitch()
+	}
+	return antigravityMaxRetries()
+}
+
+func antigravityUseMappedModelForBilling() bool {
+	v := strings.ToLower(strings.TrimSpace(os.Getenv(antigravityBillingModelEnv)))
+	return v == "1" || v == "true" || v == "yes" || v == "on"
+}
+
+func antigravityFallbackCooldownSeconds() (time.Duration, bool) {
+	raw := strings.TrimSpace(os.Getenv(antigravityFallbackSecondsEnv))
+	if raw == "" {
+		return 0, false
+	}
+	seconds, err := strconv.Atoi(raw)
+	if err != nil || seconds <= 0 {
+		return 0, false
+	}
+	return time.Duration(seconds) * time.Second, true
+}
 func (s *AntigravityGatewayService) handleUpstreamError(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope) {
 	// 429 使用 Gemini 格式解析（从 body 解析重置时间）
 	if statusCode == 429 {
@@ -1556,6 +2052,9 @@ func (s *AntigravityGatewayService) handleUpstreamError(ctx context.Context, pre
 				fallbackMinutes = s.settingService.cfg.Gateway.AntigravityFallbackCooldownMinutes
 			}
 			defaultDur := time.Duration(fallbackMinutes) * time.Minute
+			if fallbackDur, ok := antigravityFallbackCooldownSeconds(); ok {
+				defaultDur = fallbackDur
+			}
 			ra := time.Now().Add(defaultDur)
 			if useScopeLimit {
 				log.Printf("%s status=429 rate_limited scope=%s reset_in=%v (fallback)", prefix, quotaScope, defaultDur)
@@ -2193,6 +2692,10 @@ func (s *AntigravityGatewayService) writeMappedClaudeError(c *gin.Context, accou
 	return fmt.Errorf("upstream error: %d message=%s", upstreamStatus, upstreamMsg)
 }

+func (s *AntigravityGatewayService) WriteMappedClaudeError(c *gin.Context, account *Account, upstreamStatus int, upstreamRequestID string, body []byte) error {
+	return s.writeMappedClaudeError(c, account, upstreamStatus, upstreamRequestID, body)
+}
+
 func (s *AntigravityGatewayService) writeGoogleError(c *gin.Context, status int, message string) error {
 	statusStr := "UNKNOWN"
 	switch status {
@@ -2618,3 +3121,55 @@ func cleanGeminiRequest(body []byte) ([]byte, error) {

 	return json.Marshal(payload)
 }
+
+// filterEmptyPartsFromGeminiRequest 过滤 Gemini 请求中 parts 为空的消息
+// Gemini API 不接受 parts 为空数组的消息，会返回 400 错误
+func filterEmptyPartsFromGeminiRequest(body []byte) ([]byte, error) {
+	var payload map[string]any
+	if err := json.Unmarshal(body, &payload); err != nil {
+		return nil, err
+	}
+
+	contents, ok := payload["contents"].([]any)
+	if !ok || len(contents) == 0 {
+		return body, nil
+	}
+
+	filtered := make([]any, 0, len(contents))
+	modified := false
+
+	for _, c := range contents {
+		contentMap, ok := c.(map[string]any)
+		if !ok {
+			filtered = append(filtered, c)
+			continue
+		}
+
+		parts, hasParts := contentMap["parts"]
+		if !hasParts {
+			filtered = append(filtered, c)
+			continue
+		}
+
+		partsSlice, ok := parts.([]any)
+		if !ok {
+			filtered = append(filtered, c)
+			continue
+		}
+
+		// 跳过 parts 为空数组的消息
+		if len(partsSlice) == 0 {
+			modified = true
+			continue
+		}
+
+		filtered = append(filtered, c)
+	}
+
+	if !modified {
+		return body, nil
+	}
+
+	payload["contents"] = filtered
+	return json.Marshal(payload)
+}
--- a/backend/internal/service/antigravity_gateway_service_test.go
+++ b/backend/internal/service/antigravity_gateway_service_test.go
@@ -1,10 +1,16 @@
 package service

 import (
+	"bytes"
+	"context"
 	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
 	"testing"

 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/gin-gonic/gin"
 	"github.com/stretchr/testify/require"
 )

@@ -81,3 +87,106 @@ func TestStripThinkingFromClaudeRequest_DoesNotDowngradeTools(t *testing.T) {
 	require.Equal(t, "secret plan", blocks[0]["text"])
 	require.Equal(t, "tool_use", blocks[1]["type"])
 }
+
+func TestIsPromptTooLongError(t *testing.T) {
+	require.True(t, isPromptTooLongError([]byte(`{"error":{"message":"Prompt is too long"}}`)))
+	require.True(t, isPromptTooLongError([]byte(`{"message":"Prompt is too long"}`)))
+	require.False(t, isPromptTooLongError([]byte(`{"error":{"message":"other"}}`)))
+}
+
+type httpUpstreamStub struct {
+	resp *http.Response
+	err  error
+}
+
+func (s *httpUpstreamStub) Do(_ *http.Request, _ string, _ int64, _ int) (*http.Response, error) {
+	return s.resp, s.err
+}
+
+func (s *httpUpstreamStub) DoWithTLS(_ *http.Request, _ string, _ int64, _ int, _ bool) (*http.Response, error) {
+	return s.resp, s.err
+}
+
+func TestAntigravityGatewayService_Forward_PromptTooLong(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	writer := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(writer)
+
+	body, err := json.Marshal(map[string]any{
+		"model": "claude-opus-4-5",
+		"messages": []map[string]any{
+			{"role": "user", "content": "hi"},
+		},
+		"max_tokens": 1,
+		"stream":     false,
+	})
+	require.NoError(t, err)
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/messages", bytes.NewReader(body))
+	c.Request = req
+
+	respBody := []byte(`{"error":{"message":"Prompt is too long"}}`)
+	resp := &http.Response{
+		StatusCode: http.StatusBadRequest,
+		Header:     http.Header{"X-Request-Id": []string{"req-1"}},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	svc := &AntigravityGatewayService{
+		tokenProvider: &AntigravityTokenProvider{},
+		httpUpstream:  &httpUpstreamStub{resp: resp},
+	}
+
+	account := &Account{
+		ID:          1,
+		Name:        "acc-1",
+		Platform:    PlatformAntigravity,
+		Type:        AccountTypeOAuth,
+		Status:      StatusActive,
+		Concurrency: 1,
+		Credentials: map[string]any{
+			"access_token": "token",
+		},
+	}
+
+	result, err := svc.Forward(context.Background(), c, account, body)
+	require.Nil(t, result)
+
+	var promptErr *PromptTooLongError
+	require.ErrorAs(t, err, &promptErr)
+	require.Equal(t, http.StatusBadRequest, promptErr.StatusCode)
+	require.Equal(t, "req-1", promptErr.RequestID)
+	require.NotEmpty(t, promptErr.Body)
+
+	raw, ok := c.Get(OpsUpstreamErrorsKey)
+	require.True(t, ok)
+	events, ok := raw.([]*OpsUpstreamErrorEvent)
+	require.True(t, ok)
+	require.Len(t, events, 1)
+	require.Equal(t, "prompt_too_long", events[0].Kind)
+}
+
+func TestAntigravityMaxRetriesForModel_AfterSwitch(t *testing.T) {
+	t.Setenv(antigravityMaxRetriesEnv, "4")
+	t.Setenv(antigravityMaxRetriesAfterSwitchEnv, "7")
+	t.Setenv(antigravityMaxRetriesClaudeEnv, "")
+	t.Setenv(antigravityMaxRetriesGeminiTextEnv, "")
+	t.Setenv(antigravityMaxRetriesGeminiImageEnv, "")
+
+	got := antigravityMaxRetriesForModel("claude-sonnet-4-5", false)
+	require.Equal(t, 4, got)
+
+	got = antigravityMaxRetriesForModel("claude-sonnet-4-5", true)
+	require.Equal(t, 7, got)
+}
+
+func TestAntigravityMaxRetriesForModel_AfterSwitchFallback(t *testing.T) {
+	t.Setenv(antigravityMaxRetriesEnv, "5")
+	t.Setenv(antigravityMaxRetriesAfterSwitchEnv, "")
+	t.Setenv(antigravityMaxRetriesClaudeEnv, "")
+	t.Setenv(antigravityMaxRetriesGeminiTextEnv, "")
+	t.Setenv(antigravityMaxRetriesGeminiImageEnv, "")
+
+	got := antigravityMaxRetriesForModel("gemini-2.5-flash", true)
+	require.Equal(t, 5, got)
+}
--- a/backend/internal/service/antigravity_quota_scope.go
+++ b/backend/internal/service/antigravity_quota_scope.go
@@ -1,6 +1,7 @@
 package service

 import (
+	"slices"
 	"strings"
 	"time"
 )
@@ -16,6 +17,21 @@ const (
 	AntigravityQuotaScopeGeminiImage AntigravityQuotaScope = "gemini_image"
 )

+// IsScopeSupported 检查给定的 scope 是否在分组支持的 scope 列表中
+func IsScopeSupported(supportedScopes []string, scope AntigravityQuotaScope) bool {
+	if len(supportedScopes) == 0 {
+		// 未配置时默认全部支持
+		return true
+	}
+	supported := slices.Contains(supportedScopes, string(scope))
+	return supported
+}
+
+// ResolveAntigravityQuotaScope 根据模型名称解析配额域（导出版本）
+func ResolveAntigravityQuotaScope(requestedModel string) (AntigravityQuotaScope, bool) {
+	return resolveAntigravityQuotaScope(requestedModel)
+}
+
 // resolveAntigravityQuotaScope 根据模型名称解析配额域
 func resolveAntigravityQuotaScope(requestedModel string) (AntigravityQuotaScope, bool) {
 	model := normalizeAntigravityModelName(requestedModel)
--- a/backend/internal/service/api_key.go
+++ b/backend/internal/service/api_key.go
@@ -2,6 +2,14 @@ package service

 import "time"

+// API Key status constants
+const (
+	StatusAPIKeyActive         = "active"
+	StatusAPIKeyDisabled       = "disabled"
+	StatusAPIKeyQuotaExhausted = "quota_exhausted"
+	StatusAPIKeyExpired        = "expired"
+)
+
 type APIKey struct {
 	ID          int64
 	UserID      int64
@@ -15,8 +23,53 @@ type APIKey struct {
 	UpdatedAt   time.Time
 	User        *User
 	Group       *Group
+
+	// Quota fields
+	Quota     float64    // Quota limit in USD (0 = unlimited)
+	QuotaUsed float64    // Used quota amount
+	ExpiresAt *time.Time // Expiration time (nil = never expires)
 }

 func (k *APIKey) IsActive() bool {
 	return k.Status == StatusActive
 }
+
+// IsExpired checks if the API key has expired
+func (k *APIKey) IsExpired() bool {
+	if k.ExpiresAt == nil {
+		return false
+	}
+	return time.Now().After(*k.ExpiresAt)
+}
+
+// IsQuotaExhausted checks if the API key quota is exhausted
+func (k *APIKey) IsQuotaExhausted() bool {
+	if k.Quota <= 0 {
+		return false // unlimited
+	}
+	return k.QuotaUsed >= k.Quota
+}
+
+// GetQuotaRemaining returns remaining quota (-1 for unlimited)
+func (k *APIKey) GetQuotaRemaining() float64 {
+	if k.Quota <= 0 {
+		return -1 // unlimited
+	}
+	remaining := k.Quota - k.QuotaUsed
+	if remaining < 0 {
+		return 0
+	}
+	return remaining
+}
+
+// GetDaysUntilExpiry returns days until expiry (-1 for never expires)
+func (k *APIKey) GetDaysUntilExpiry() int {
+	if k.ExpiresAt == nil {
+		return -1 // never expires
+	}
+	duration := time.Until(*k.ExpiresAt)
+	if duration < 0 {
+		return 0
+	}
+	return int(duration.Hours() / 24)
+}
--- a/backend/internal/service/api_key_auth_cache.go
+++ b/backend/internal/service/api_key_auth_cache.go
@@ -1,5 +1,7 @@
 package service

+import "time"
+
 // APIKeyAuthSnapshot API Key 认证缓存快照（仅包含认证所需字段）
 type APIKeyAuthSnapshot struct {
 	APIKeyID    int64                    `json:"api_key_id"`
@@ -10,6 +12,13 @@ type APIKeyAuthSnapshot struct {
 	IPBlacklist []string                 `json:"ip_blacklist,omitempty"`
 	User        APIKeyAuthUserSnapshot   `json:"user"`
 	Group       *APIKeyAuthGroupSnapshot `json:"group,omitempty"`
+
+	// Quota fields for API Key independent quota feature
+	Quota     float64 `json:"quota"`      // Quota limit in USD (0 = unlimited)
+	QuotaUsed float64 `json:"quota_used"` // Used quota amount
+
+	// Expiration field for API Key expiration feature
+	ExpiresAt *time.Time `json:"expires_at,omitempty"` // Expiration time (nil = never expires)
 }

 // APIKeyAuthUserSnapshot 用户快照
@@ -23,25 +32,30 @@ type APIKeyAuthUserSnapshot struct {

 // APIKeyAuthGroupSnapshot 分组快照
 type APIKeyAuthGroupSnapshot struct {
-	ID               int64    `json:"id"`
-	Name             string   `json:"name"`
-	Platform         string   `json:"platform"`
-	Status           string   `json:"status"`
-	SubscriptionType string   `json:"subscription_type"`
-	RateMultiplier   float64  `json:"rate_multiplier"`
-	DailyLimitUSD    *float64 `json:"daily_limit_usd,omitempty"`
-	WeeklyLimitUSD   *float64 `json:"weekly_limit_usd,omitempty"`
-	MonthlyLimitUSD  *float64 `json:"monthly_limit_usd,omitempty"`
-	ImagePrice1K     *float64 `json:"image_price_1k,omitempty"`
-	ImagePrice2K     *float64 `json:"image_price_2k,omitempty"`
-	ImagePrice4K     *float64 `json:"image_price_4k,omitempty"`
-	ClaudeCodeOnly   bool     `json:"claude_code_only"`
-	FallbackGroupID  *int64   `json:"fallback_group_id,omitempty"`
+	ID                              int64    `json:"id"`
+	Name                            string   `json:"name"`
+	Platform                        string   `json:"platform"`
+	Status                          string   `json:"status"`
+	SubscriptionType                string   `json:"subscription_type"`
+	RateMultiplier                  float64  `json:"rate_multiplier"`
+	DailyLimitUSD                   *float64 `json:"daily_limit_usd,omitempty"`
+	WeeklyLimitUSD                  *float64 `json:"weekly_limit_usd,omitempty"`
+	MonthlyLimitUSD                 *float64 `json:"monthly_limit_usd,omitempty"`
+	ImagePrice1K                    *float64 `json:"image_price_1k,omitempty"`
+	ImagePrice2K                    *float64 `json:"image_price_2k,omitempty"`
+	ImagePrice4K                    *float64 `json:"image_price_4k,omitempty"`
+	ClaudeCodeOnly                  bool     `json:"claude_code_only"`
+	FallbackGroupID                 *int64   `json:"fallback_group_id,omitempty"`
+	FallbackGroupIDOnInvalidRequest *int64   `json:"fallback_group_id_on_invalid_request,omitempty"`

 	// Model routing is used by gateway account selection, so it must be part of auth cache snapshot.
 	// Only anthropic groups use these fields; others may leave them empty.
 	ModelRouting        map[string][]int64 `json:"model_routing,omitempty"`
 	ModelRoutingEnabled bool               `json:"model_routing_enabled"`
+	MCPXMLInject        bool               `json:"mcp_xml_inject"`
+
+	// 支持的模型系列（仅 antigravity 平台使用）
+	SupportedModelScopes []string `json:"supported_model_scopes,omitempty"`
 }

 // APIKeyAuthCacheEntry 缓存条目，支持负缓存
--- a/backend/internal/service/api_key_auth_cache_impl.go
+++ b/backend/internal/service/api_key_auth_cache_impl.go
@@ -213,6 +213,9 @@ func (s *APIKeyService) snapshotFromAPIKey(apiKey *APIKey) *APIKeyAuthSnapshot {
 		Status:      apiKey.Status,
 		IPWhitelist: apiKey.IPWhitelist,
 		IPBlacklist: apiKey.IPBlacklist,
+		Quota:       apiKey.Quota,
+		QuotaUsed:   apiKey.QuotaUsed,
+		ExpiresAt:   apiKey.ExpiresAt,
 		User: APIKeyAuthUserSnapshot{
 			ID:          apiKey.User.ID,
 			Status:      apiKey.User.Status,
@@ -223,22 +226,25 @@ func (s *APIKeyService) snapshotFromAPIKey(apiKey *APIKey) *APIKeyAuthSnapshot {
 	}
 	if apiKey.Group != nil {
 		snapshot.Group = &APIKeyAuthGroupSnapshot{
-			ID:                  apiKey.Group.ID,
-			Name:                apiKey.Group.Name,
-			Platform:            apiKey.Group.Platform,
-			Status:              apiKey.Group.Status,
-			SubscriptionType:    apiKey.Group.SubscriptionType,
-			RateMultiplier:      apiKey.Group.RateMultiplier,
-			DailyLimitUSD:       apiKey.Group.DailyLimitUSD,
-			WeeklyLimitUSD:      apiKey.Group.WeeklyLimitUSD,
-			MonthlyLimitUSD:     apiKey.Group.MonthlyLimitUSD,
-			ImagePrice1K:        apiKey.Group.ImagePrice1K,
-			ImagePrice2K:        apiKey.Group.ImagePrice2K,
-			ImagePrice4K:        apiKey.Group.ImagePrice4K,
-			ClaudeCodeOnly:      apiKey.Group.ClaudeCodeOnly,
-			FallbackGroupID:     apiKey.Group.FallbackGroupID,
-			ModelRouting:        apiKey.Group.ModelRouting,
-			ModelRoutingEnabled: apiKey.Group.ModelRoutingEnabled,
+			ID:                              apiKey.Group.ID,
+			Name:                            apiKey.Group.Name,
+			Platform:                        apiKey.Group.Platform,
+			Status:                          apiKey.Group.Status,
+			SubscriptionType:                apiKey.Group.SubscriptionType,
+			RateMultiplier:                  apiKey.Group.RateMultiplier,
+			DailyLimitUSD:                   apiKey.Group.DailyLimitUSD,
+			WeeklyLimitUSD:                  apiKey.Group.WeeklyLimitUSD,
+			MonthlyLimitUSD:                 apiKey.Group.MonthlyLimitUSD,
+			ImagePrice1K:                    apiKey.Group.ImagePrice1K,
+			ImagePrice2K:                    apiKey.Group.ImagePrice2K,
+			ImagePrice4K:                    apiKey.Group.ImagePrice4K,
+			ClaudeCodeOnly:                  apiKey.Group.ClaudeCodeOnly,
+			FallbackGroupID:                 apiKey.Group.FallbackGroupID,
+			FallbackGroupIDOnInvalidRequest: apiKey.Group.FallbackGroupIDOnInvalidRequest,
+			ModelRouting:                    apiKey.Group.ModelRouting,
+			ModelRoutingEnabled:             apiKey.Group.ModelRoutingEnabled,
+			MCPXMLInject:                    apiKey.Group.MCPXMLInject,
+			SupportedModelScopes:            apiKey.Group.SupportedModelScopes,
 		}
 	}
 	return snapshot
@@ -256,6 +262,9 @@ func (s *APIKeyService) snapshotToAPIKey(key string, snapshot *APIKeyAuthSnapsho
 		Status:      snapshot.Status,
 		IPWhitelist: snapshot.IPWhitelist,
 		IPBlacklist: snapshot.IPBlacklist,
+		Quota:       snapshot.Quota,
+		QuotaUsed:   snapshot.QuotaUsed,
+		ExpiresAt:   snapshot.ExpiresAt,
 		User: &User{
 			ID:          snapshot.User.ID,
 			Status:      snapshot.User.Status,
@@ -266,23 +275,26 @@ func (s *APIKeyService) snapshotToAPIKey(key string, snapshot *APIKeyAuthSnapsho
 	}
 	if snapshot.Group != nil {
 		apiKey.Group = &Group{
-			ID:                  snapshot.Group.ID,
-			Name:                snapshot.Group.Name,
-			Platform:            snapshot.Group.Platform,
-			Status:              snapshot.Group.Status,
-			Hydrated:            true,
-			SubscriptionType:    snapshot.Group.SubscriptionType,
-			RateMultiplier:      snapshot.Group.RateMultiplier,
-			DailyLimitUSD:       snapshot.Group.DailyLimitUSD,
-			WeeklyLimitUSD:      snapshot.Group.WeeklyLimitUSD,
-			MonthlyLimitUSD:     snapshot.Group.MonthlyLimitUSD,
-			ImagePrice1K:        snapshot.Group.ImagePrice1K,
-			ImagePrice2K:        snapshot.Group.ImagePrice2K,
-			ImagePrice4K:        snapshot.Group.ImagePrice4K,
-			ClaudeCodeOnly:      snapshot.Group.ClaudeCodeOnly,
-			FallbackGroupID:     snapshot.Group.FallbackGroupID,
-			ModelRouting:        snapshot.Group.ModelRouting,
-			ModelRoutingEnabled: snapshot.Group.ModelRoutingEnabled,
+			ID:                              snapshot.Group.ID,
+			Name:                            snapshot.Group.Name,
+			Platform:                        snapshot.Group.Platform,
+			Status:                          snapshot.Group.Status,
+			Hydrated:                        true,
+			SubscriptionType:                snapshot.Group.SubscriptionType,
+			RateMultiplier:                  snapshot.Group.RateMultiplier,
+			DailyLimitUSD:                   snapshot.Group.DailyLimitUSD,
+			WeeklyLimitUSD:                  snapshot.Group.WeeklyLimitUSD,
+			MonthlyLimitUSD:                 snapshot.Group.MonthlyLimitUSD,
+			ImagePrice1K:                    snapshot.Group.ImagePrice1K,
+			ImagePrice2K:                    snapshot.Group.ImagePrice2K,
+			ImagePrice4K:                    snapshot.Group.ImagePrice4K,
+			ClaudeCodeOnly:                  snapshot.Group.ClaudeCodeOnly,
+			FallbackGroupID:                 snapshot.Group.FallbackGroupID,
+			FallbackGroupIDOnInvalidRequest: snapshot.Group.FallbackGroupIDOnInvalidRequest,
+			ModelRouting:                    snapshot.Group.ModelRouting,
+			ModelRoutingEnabled:             snapshot.Group.ModelRoutingEnabled,
+			MCPXMLInject:                    snapshot.Group.MCPXMLInject,
+			SupportedModelScopes:            snapshot.Group.SupportedModelScopes,
 		}
 	}
 	return apiKey
--- a/backend/internal/service/api_key_service.go
+++ b/backend/internal/service/api_key_service.go
@@ -24,6 +24,10 @@ var (
 	ErrAPIKeyInvalidChars = infraerrors.BadRequest("API_KEY_INVALID_CHARS", "api key can only contain letters, numbers, underscores, and hyphens")
 	ErrAPIKeyRateLimited  = infraerrors.TooManyRequests("API_KEY_RATE_LIMITED", "too many failed attempts, please try again later")
 	ErrInvalidIPPattern   = infraerrors.BadRequest("INVALID_IP_PATTERN", "invalid IP or CIDR pattern")
+	// ErrAPIKeyExpired        = infraerrors.Forbidden("API_KEY_EXPIRED", "api key has expired")
+	ErrAPIKeyExpired = infraerrors.Forbidden("API_KEY_EXPIRED", "api key 已过期")
+	// ErrAPIKeyQuotaExhausted = infraerrors.TooManyRequests("API_KEY_QUOTA_EXHAUSTED", "api key quota exhausted")
+	ErrAPIKeyQuotaExhausted = infraerrors.TooManyRequests("API_KEY_QUOTA_EXHAUSTED", "api key 额度已用完")
 )

 const (
@@ -51,6 +55,9 @@ type APIKeyRepository interface {
 	CountByGroupID(ctx context.Context, groupID int64) (int64, error)
 	ListKeysByUserID(ctx context.Context, userID int64) ([]string, error)
 	ListKeysByGroupID(ctx context.Context, groupID int64) ([]string, error)
+
+	// Quota methods
+	IncrementQuotaUsed(ctx context.Context, id int64, amount float64) (float64, error)
 }

 // APIKeyCache defines cache operations for API key service
@@ -85,6 +92,10 @@ type CreateAPIKeyRequest struct {
 	CustomKey   *string  `json:"custom_key"`   // 可选的自定义key
 	IPWhitelist []string `json:"ip_whitelist"` // IP 白名单
 	IPBlacklist []string `json:"ip_blacklist"` // IP 黑名单
+
+	// Quota fields
+	Quota         float64 `json:"quota"`           // Quota limit in USD (0 = unlimited)
+	ExpiresInDays *int    `json:"expires_in_days"` // Days until expiry (nil = never expires)
 }

 // UpdateAPIKeyRequest 更新API Key请求
@@ -94,6 +105,12 @@ type UpdateAPIKeyRequest struct {
 	Status      *string  `json:"status"`
 	IPWhitelist []string `json:"ip_whitelist"` // IP 白名单（空数组清空）
 	IPBlacklist []string `json:"ip_blacklist"` // IP 黑名单（空数组清空）
+
+	// Quota fields
+	Quota           *float64   `json:"quota"`       // Quota limit in USD (nil = no change, 0 = unlimited)
+	ExpiresAt       *time.Time `json:"expires_at"`  // Expiration time (nil = no change)
+	ClearExpiration bool       `json:"-"`           // Clear expiration (internal use)
+	ResetQuota      *bool      `json:"reset_quota"` // Reset quota_used to 0
 }

 // APIKeyService API Key服务
@@ -289,6 +306,14 @@ func (s *APIKeyService) Create(ctx context.Context, userID int64, req CreateAPIK
 		Status:      StatusActive,
 		IPWhitelist: req.IPWhitelist,
 		IPBlacklist: req.IPBlacklist,
+		Quota:       req.Quota,
+		QuotaUsed:   0,
+	}
+
+	// Set expiration time if specified
+	if req.ExpiresInDays != nil && *req.ExpiresInDays > 0 {
+		expiresAt := time.Now().AddDate(0, 0, *req.ExpiresInDays)
+		apiKey.ExpiresAt = &expiresAt
 	}

 	if err := s.apiKeyRepo.Create(ctx, apiKey); err != nil {
@@ -436,6 +461,35 @@ func (s *APIKeyService) Update(ctx context.Context, id int64, userID int64, req
 		}
 	}

+	// Update quota fields
+	if req.Quota != nil {
+		apiKey.Quota = *req.Quota
+		// If quota is increased and status was quota_exhausted, reactivate
+		if apiKey.Status == StatusAPIKeyQuotaExhausted && *req.Quota > apiKey.QuotaUsed {
+			apiKey.Status = StatusActive
+		}
+	}
+	if req.ResetQuota != nil && *req.ResetQuota {
+		apiKey.QuotaUsed = 0
+		// If resetting quota and status was quota_exhausted, reactivate
+		if apiKey.Status == StatusAPIKeyQuotaExhausted {
+			apiKey.Status = StatusActive
+		}
+	}
+	if req.ClearExpiration {
+		apiKey.ExpiresAt = nil
+		// If clearing expiry and status was expired, reactivate
+		if apiKey.Status == StatusAPIKeyExpired {
+			apiKey.Status = StatusActive
+		}
+	} else if req.ExpiresAt != nil {
+		apiKey.ExpiresAt = req.ExpiresAt
+		// If extending expiry and status was expired, reactivate
+		if apiKey.Status == StatusAPIKeyExpired && time.Now().Before(*req.ExpiresAt) {
+			apiKey.Status = StatusActive
+		}
+	}
+
 	// 更新 IP 限制（空数组会清空设置）
 	apiKey.IPWhitelist = req.IPWhitelist
 	apiKey.IPBlacklist = req.IPBlacklist
@@ -572,3 +626,51 @@ func (s *APIKeyService) SearchAPIKeys(ctx context.Context, userID int64, keyword
 	}
 	return keys, nil
 }
+
+// CheckAPIKeyQuotaAndExpiry checks if the API key is valid for use (not expired, quota not exhausted)
+// Returns nil if valid, error if invalid
+func (s *APIKeyService) CheckAPIKeyQuotaAndExpiry(apiKey *APIKey) error {
+	// Check expiration
+	if apiKey.IsExpired() {
+		return ErrAPIKeyExpired
+	}
+
+	// Check quota
+	if apiKey.IsQuotaExhausted() {
+		return ErrAPIKeyQuotaExhausted
+	}
+
+	return nil
+}
+
+// UpdateQuotaUsed updates the quota_used field after a request
+// Also checks if quota is exhausted and updates status accordingly
+func (s *APIKeyService) UpdateQuotaUsed(ctx context.Context, apiKeyID int64, cost float64) error {
+	if cost <= 0 {
+		return nil
+	}
+
+	// Use repository to atomically increment quota_used
+	newQuotaUsed, err := s.apiKeyRepo.IncrementQuotaUsed(ctx, apiKeyID, cost)
+	if err != nil {
+		return fmt.Errorf("increment quota used: %w", err)
+	}
+
+	// Check if quota is now exhausted and update status if needed
+	apiKey, err := s.apiKeyRepo.GetByID(ctx, apiKeyID)
+	if err != nil {
+		return nil // Don't fail the request, just log
+	}
+
+	// If quota is set and now exhausted, update status
+	if apiKey.Quota > 0 && newQuotaUsed >= apiKey.Quota {
+		apiKey.Status = StatusAPIKeyQuotaExhausted
+		if err := s.apiKeyRepo.Update(ctx, apiKey); err != nil {
+			return nil // Don't fail the request
+		}
+		// Invalidate cache so next request sees the new status
+		s.InvalidateAuthCacheByKey(ctx, apiKey.Key)
+	}
+
+	return nil
+}
--- a/backend/internal/service/api_key_service_cache_test.go
+++ b/backend/internal/service/api_key_service_cache_test.go
@@ -99,6 +99,10 @@ func (s *authRepoStub) ListKeysByGroupID(ctx context.Context, groupID int64) ([]
 	return s.listKeysByGroupID(ctx, groupID)
 }

+func (s *authRepoStub) IncrementQuotaUsed(ctx context.Context, id int64, amount float64) (float64, error) {
+	panic("unexpected IncrementQuotaUsed call")
+}
+
 type authCacheStub struct {
 	getAuthCache   func(ctx context.Context, key string) (*APIKeyAuthCacheEntry, error)
 	setAuthKeys    []string
--- a/backend/internal/service/api_key_service_delete_test.go
+++ b/backend/internal/service/api_key_service_delete_test.go
@@ -118,6 +118,10 @@ func (s *apiKeyRepoStub) ListKeysByGroupID(ctx context.Context, groupID int64) (
 	panic("unexpected ListKeysByGroupID call")
 }

+func (s *apiKeyRepoStub) IncrementQuotaUsed(ctx context.Context, id int64, amount float64) (float64, error) {
+	panic("unexpected IncrementQuotaUsed call")
+}
+
 // apiKeyCacheStub 是 APIKeyCache 接口的测试桩实现。
 // 用于验证删除操作时缓存清理逻辑是否被正确调用。
 //
--- a/backend/internal/service/auth_service.go
+++ b/backend/internal/service/auth_service.go
@@ -185,7 +185,6 @@ func (s *AuthService) RegisterWithVerification(ctx context.Context, email, passw
 			log.Printf("[Auth] Failed to mark invitation code as used for user %d: %v", user.ID, err)
 		}
 	}
-
 	// 应用优惠码（如果提供且功能已启用）
 	if promoCode != "" && s.promoService != nil && s.settingService != nil && s.settingService.IsPromoCodeEnabled(ctx) {
 		if err := s.promoService.ApplyPromoCode(ctx, user.ID, promoCode); err != nil {
--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -31,6 +31,7 @@ const (
 	AccountTypeOAuth      = domain.AccountTypeOAuth      // OAuth类型账号（full scope: profile + inference）
 	AccountTypeSetupToken = domain.AccountTypeSetupToken // Setup Token类型账号（inference only scope）
 	AccountTypeAPIKey     = domain.AccountTypeAPIKey     // API Key类型账号
+	AccountTypeUpstream   = domain.AccountTypeUpstream   // 上游透传类型账号（通过 Base URL + API Key 连接上游）
 )

 // Redeem type constants
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -257,6 +257,9 @@ var (
 // ErrClaudeCodeOnly 表示分组仅允许 Claude Code 客户端访问
 var ErrClaudeCodeOnly = errors.New("this group only allows Claude Code clients")

+// ErrModelScopeNotSupported 表示请求的模型系列不在分组支持的范围内
+var ErrModelScopeNotSupported = errors.New("model scope not supported by this group")
+
 // allowedHeaders 白名单headers（参考CRS项目）
 var allowedHeaders = map[string]bool{
 	"accept":                                    true,
@@ -585,12 +588,18 @@ func (s *GatewayService) hashContent(content string) string {
 }

 // replaceModelInBody 替换请求体中的model字段
+// 使用 json.RawMessage 保留其他字段的原始字节，避免 thinking 块等内容被修改
 func (s *GatewayService) replaceModelInBody(body []byte, newModel string) []byte {
-	var req map[string]any
+	var req map[string]json.RawMessage
 	if err := json.Unmarshal(body, &req); err != nil {
 		return body
 	}
-	req["model"] = newModel
+	// 只序列化 model 字段
+	modelBytes, err := json.Marshal(newModel)
+	if err != nil {
+		return body
+	}
+	req["model"] = modelBytes
 	newBody, err := json.Marshal(req)
 	if err != nil {
 		return body
@@ -787,12 +796,21 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 	if len(body) == 0 {
 		return body, modelID, nil
 	}
+
+	// 使用 json.RawMessage 保留 messages 的原始字节，避免 thinking 块被修改
+	var reqRaw map[string]json.RawMessage
+	if err := json.Unmarshal(body, &reqRaw); err != nil {
+		return body, modelID, nil
+	}
+
+	// 同时解析为 map[string]any 用于修改非 messages 字段
 	var req map[string]any
 	if err := json.Unmarshal(body, &req); err != nil {
 		return body, modelID, nil
 	}

 	toolNameMap := make(map[string]string)
+	modified := false

 	if system, ok := req["system"]; ok {
 		switch v := system.(type) {
@@ -800,6 +818,7 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 			sanitized := sanitizeSystemText(v)
 			if sanitized != v {
 				req["system"] = sanitized
+				modified = true
 			}
 		case []any:
 			for _, item := range v {
@@ -817,6 +836,7 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 				sanitized := sanitizeSystemText(text)
 				if sanitized != text {
 					block["text"] = sanitized
+					modified = true
 				}
 			}
 		}
@@ -827,6 +847,7 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 		if normalized != rawModel {
 			req["model"] = normalized
 			modelID = normalized
+			modified = true
 		}
 	}

@@ -842,16 +863,19 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 					normalized := normalizeToolNameForClaude(name, toolNameMap)
 					if normalized != "" && normalized != name {
 						toolMap["name"] = normalized
+						modified = true
 					}
 				}
 				if desc, ok := toolMap["description"].(string); ok {
 					sanitized := sanitizeToolDescription(desc)
 					if sanitized != desc {
 						toolMap["description"] = sanitized
+						modified = true
 					}
 				}
 				if schema, ok := toolMap["input_schema"]; ok {
 					normalizeToolInputSchema(schema, toolNameMap)
+					modified = true
 				}
 				tools[idx] = toolMap
 			}
@@ -880,11 +904,15 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 				normalizedTools[normalized] = value
 			}
 			req["tools"] = normalizedTools
+			modified = true
 		}
 	} else {
 		req["tools"] = []any{}
+		modified = true
 	}

+	// 处理 messages 中的 tool_use 块，但保留包含 thinking 块的消息的原始字节
+	messagesModified := false
 	if messages, ok := req["messages"].([]any); ok {
 		for _, msg := range messages {
 			msgMap, ok := msg.(map[string]any)
@@ -895,6 +923,24 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 			if !ok {
 				continue
 			}
+			// 检查此消息是否包含 thinking 块
+			hasThinking := false
+			for _, block := range content {
+				blockMap, ok := block.(map[string]any)
+				if !ok {
+					continue
+				}
+				blockType, _ := blockMap["type"].(string)
+				if blockType == "thinking" || blockType == "redacted_thinking" {
+					hasThinking = true
+					break
+				}
+			}
+			// 如果包含 thinking 块，跳过此消息的修改
+			if hasThinking {
+				continue
+			}
+			// 只修改不包含 thinking 块的消息中的 tool_use
 			for _, block := range content {
 				blockMap, ok := block.(map[string]any)
 				if !ok {
@@ -907,6 +953,7 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 					normalized := normalizeToolNameForClaude(name, toolNameMap)
 					if normalized != "" && normalized != name {
 						blockMap["name"] = normalized
+						messagesModified = true
 					}
 				}
 			}
@@ -916,6 +963,7 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 	if opts.stripSystemCacheControl {
 		if system, ok := req["system"]; ok {
 			_ = stripCacheControlFromSystemBlocks(system)
+			modified = true
 		}
 	}

@@ -927,12 +975,46 @@ func normalizeClaudeOAuthRequestBody(body []byte, modelID string, opts claudeOAu
 		}
 		if existing, ok := metadata["user_id"].(string); !ok || existing == "" {
 			metadata["user_id"] = opts.metadataUserID
+			modified = true
 		}
 	}

-	delete(req, "temperature")
-	delete(req, "tool_choice")
+	if _, hasTemp := req["temperature"]; hasTemp {
+		delete(req, "temperature")
+		modified = true
+	}
+	if _, hasChoice := req["tool_choice"]; hasChoice {
+		delete(req, "tool_choice")
+		modified = true
+	}

+	if !modified && !messagesModified {
+		return body, modelID, toolNameMap
+	}
+
+	// 如果 messages 没有被修改，保留原始 messages 字节
+	if !messagesModified {
+		// 序列化非 messages 字段
+		newBody, err := json.Marshal(req)
+		if err != nil {
+			return body, modelID, toolNameMap
+		}
+		// 替换回原始的 messages
+		var newReq map[string]json.RawMessage
+		if err := json.Unmarshal(newBody, &newReq); err != nil {
+			return newBody, modelID, toolNameMap
+		}
+		if origMessages, ok := reqRaw["messages"]; ok {
+			newReq["messages"] = origMessages
+		}
+		finalBody, err := json.Marshal(newReq)
+		if err != nil {
+			return newBody, modelID, toolNameMap
+		}
+		return finalBody, modelID, toolNameMap
+	}
+
+	// messages 被修改了，需要完整序列化
 	newBody, err := json.Marshal(req)
 	if err != nil {
 		return body, modelID, toolNameMap
@@ -1135,6 +1217,13 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 		log.Printf("[ModelRoutingDebug] load-aware enabled: group_id=%v model=%s session=%s platform=%s", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), platform)
 	}

+	// Antigravity 模型系列检查（在账号选择前检查，确保所有代码路径都经过此检查）
+	if platform == PlatformAntigravity && groupID != nil && requestedModel != "" {
+		if err := s.checkAntigravityModelScope(ctx, *groupID, requestedModel); err != nil {
+			return nil, err
+		}
+	}
+
 	accounts, useMixed, err := s.listSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
 	if err != nil {
 		return nil, err
@@ -1632,6 +1721,10 @@ func (s *GatewayService) resolveGroupByID(ctx context.Context, groupID int64) (*
 	return group, nil
 }

+func (s *GatewayService) ResolveGroupByID(ctx context.Context, groupID int64) (*Group, error) {
+	return s.resolveGroupByID(ctx, groupID)
+}
+
 func (s *GatewayService) routingAccountIDsForRequest(ctx context.Context, groupID *int64, requestedModel string, platform string) []int64 {
 	if groupID == nil || requestedModel == "" || platform != PlatformAnthropic {
 		return nil
@@ -1697,7 +1790,7 @@ func (s *GatewayService) checkClaudeCodeRestriction(ctx context.Context, groupID
 	}

 	// 强制平台模式不检查 Claude Code 限制
-	if _, hasForcePlatform := ctx.Value(ctxkey.ForcePlatform).(string); hasForcePlatform {
+	if forcePlatform, hasForcePlatform := ctx.Value(ctxkey.ForcePlatform).(string); hasForcePlatform && forcePlatform != "" {
 		return nil, groupID, nil
 	}

@@ -2026,6 +2119,13 @@ func shuffleWithinPriority(accounts []*Account) {

 // selectAccountForModelWithPlatform 选择单平台账户（完全隔离）
 func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, platform string) (*Account, error) {
+	// 对 Antigravity 平台，检查请求的模型系列是否在分组支持范围内
+	if platform == PlatformAntigravity && groupID != nil && requestedModel != "" {
+		if err := s.checkAntigravityModelScope(ctx, *groupID, requestedModel); err != nil {
+			return nil, err
+		}
+	}
+
 	preferOAuth := platform == PlatformGemini
 	routingAccountIDs := s.routingAccountIDsForRequest(ctx, groupID, requestedModel, platform)

@@ -2461,6 +2561,10 @@ func (s *GatewayService) isModelSupportedByAccount(account *Account, requestedMo
 		// Antigravity 平台使用专门的模型支持检查
 		return IsAntigravityModelSupported(requestedModel)
 	}
+	// OAuth/SetupToken 账号使用 Anthropic 标准映射（短ID → 长ID）
+	if account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
+		requestedModel = claude.NormalizeModelID(requestedModel)
+	}
 	// Gemini API Key 账户直接透传，由上游判断模型是否支持
 	if account.Platform == PlatformGemini && account.Type == AccountTypeAPIKey {
 		return true
@@ -2910,16 +3014,30 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 	// 强制执行 cache_control 块数量限制（最多 4 个）
 	body = enforceCacheControlLimit(body)

-	// 应用模型映射（仅对apikey类型账号）
+	// 应用模型映射：
+	// - APIKey 账号：使用账号级别的显式映射（如果配置），否则透传原始模型名
+	// - OAuth/SetupToken 账号：使用 Anthropic 标准映射（短ID → 长ID）
+	mappedModel := reqModel
+	mappingSource := ""
 	if account.Type == AccountTypeAPIKey {
-		mappedModel := account.GetMappedModel(reqModel)
+		mappedModel = account.GetMappedModel(reqModel)
 		if mappedModel != reqModel {
-			// 替换请求体中的模型名
-			body = s.replaceModelInBody(body, mappedModel)
-			reqModel = mappedModel
-			log.Printf("Model mapping applied: %s -> %s (account: %s)", originalModel, mappedModel, account.Name)
+			mappingSource = "account"
 		}
 	}
+	if mappingSource == "" && account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
+		normalized := claude.NormalizeModelID(reqModel)
+		if normalized != reqModel {
+			mappedModel = normalized
+			mappingSource = "prefix"
+		}
+	}
+	if mappedModel != reqModel {
+		// 替换请求体中的模型名
+		body = s.replaceModelInBody(body, mappedModel)
+		reqModel = mappedModel
+		log.Printf("Model mapping applied: %s -> %s (account: %s, source=%s)", originalModel, mappedModel, account.Name, mappingSource)
+	}

 	// 获取凭证
 	token, tokenType, err := s.GetAccessToken(ctx, account)
@@ -3621,6 +3739,13 @@ func (s *GatewayService) isThinkingBlockSignatureError(respBody []byte) bool {
 		return true
 	}

+	// 检测 thinking block 被修改的错误
+	// 例如: "thinking or redacted_thinking blocks in the latest assistant message cannot be modified"
+	if strings.Contains(msg, "cannot be modified") && (strings.Contains(msg, "thinking") || strings.Contains(msg, "redacted_thinking")) {
+		log.Printf("[SignatureCheck] Detected thinking block modification error")
+		return true
+	}
+
 	// 检测空消息内容错误（可能是过滤 thinking blocks 后导致的）
 	// 例如: "all messages must have non-empty content"
 	if strings.Contains(msg, "non-empty content") || strings.Contains(msg, "empty content") {
@@ -4489,13 +4614,19 @@ func (s *GatewayService) replaceToolNamesInResponseBody(body []byte, toolNameMap

 // RecordUsageInput 记录使用量的输入参数
 type RecordUsageInput struct {
-	Result       *ForwardResult
-	APIKey       *APIKey
-	User         *User
-	Account      *Account
-	Subscription *UserSubscription // 可选：订阅信息
-	UserAgent    string            // 请求的 User-Agent
-	IPAddress    string            // 请求的客户端 IP 地址
+	Result        *ForwardResult
+	APIKey        *APIKey
+	User          *User
+	Account       *Account
+	Subscription  *UserSubscription  // 可选：订阅信息
+	UserAgent     string             // 请求的 User-Agent
+	IPAddress     string             // 请求的客户端 IP 地址
+	APIKeyService APIKeyQuotaUpdater // 可选：用于更新API Key配额
+}
+
+// APIKeyQuotaUpdater defines the interface for updating API Key quota
+type APIKeyQuotaUpdater interface {
+	UpdateQuotaUsed(ctx context.Context, apiKeyID int64, cost float64) error
 }

 // RecordUsage 记录使用量并扣费（或更新订阅用量）
@@ -4635,6 +4766,13 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu
 		}
 	}

+	// 更新 API Key 配额（如果设置了配额限制）
+	if shouldBill && cost.ActualCost > 0 && apiKey.Quota > 0 && input.APIKeyService != nil {
+		if err := input.APIKeyService.UpdateQuotaUsed(ctx, apiKey.ID, cost.ActualCost); err != nil {
+			log.Printf("Update API key quota failed: %v", err)
+		}
+	}
+
 	// Schedule batch update for account last_used_at
 	s.deferredService.ScheduleLastUsedUpdate(account.ID)

@@ -4652,6 +4790,7 @@ type RecordUsageLongContextInput struct {
 	IPAddress             string            // 请求的客户端 IP 地址
 	LongContextThreshold  int               // 长上下文阈值（如 200000）
 	LongContextMultiplier float64           // 超出阈值部分的倍率（如 2.0）
+	APIKeyService         *APIKeyService    // API Key 配额服务（可选）
 }

 // RecordUsageWithLongContext 记录使用量并扣费，支持长上下文双倍计费（用于 Gemini）
@@ -4788,6 +4927,12 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input *
 			}
 			// 异步更新余额缓存
 			s.billingCacheService.QueueDeductBalance(user.ID, cost.ActualCost)
+			// API Key 独立配额扣费
+			if input.APIKeyService != nil && apiKey.Quota > 0 {
+				if err := input.APIKeyService.UpdateQuotaUsed(ctx, apiKey.ID, cost.ActualCost); err != nil {
+					log.Printf("Add API key quota used failed: %v", err)
+				}
+			}
 		}
 	}

@@ -4822,16 +4967,30 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,
 		return nil
 	}

-	// 应用模型映射（仅对 apikey 类型账号）
-	if account.Type == AccountTypeAPIKey {
-		if reqModel != "" {
-			mappedModel := account.GetMappedModel(reqModel)
+	// 应用模型映射：
+	// - APIKey 账号：使用账号级别的显式映射（如果配置），否则透传原始模型名
+	// - OAuth/SetupToken 账号：使用 Anthropic 标准映射（短ID → 长ID）
+	if reqModel != "" {
+		mappedModel := reqModel
+		mappingSource := ""
+		if account.Type == AccountTypeAPIKey {
+			mappedModel = account.GetMappedModel(reqModel)
 			if mappedModel != reqModel {
-				body = s.replaceModelInBody(body, mappedModel)
-				reqModel = mappedModel
-				log.Printf("CountTokens model mapping applied: %s -> %s (account: %s)", parsed.Model, mappedModel, account.Name)
+				mappingSource = "account"
 			}
 		}
+		if mappingSource == "" && account.Platform == PlatformAnthropic && account.Type != AccountTypeAPIKey {
+			normalized := claude.NormalizeModelID(reqModel)
+			if normalized != reqModel {
+				mappedModel = normalized
+				mappingSource = "prefix"
+			}
+		}
+		if mappedModel != reqModel {
+			body = s.replaceModelInBody(body, mappedModel)
+			reqModel = mappedModel
+			log.Printf("CountTokens model mapping applied: %s -> %s (account: %s, source=%s)", parsed.Model, mappedModel, account.Name, mappingSource)
+		}
 	}

 	// 获取凭证
@@ -5083,6 +5242,27 @@ func (s *GatewayService) validateUpstreamBaseURL(raw string) (string, error) {
 	return normalized, nil
 }

+// checkAntigravityModelScope 检查 Antigravity 平台的模型系列是否在分组支持范围内
+func (s *GatewayService) checkAntigravityModelScope(ctx context.Context, groupID int64, requestedModel string) error {
+	scope, ok := ResolveAntigravityQuotaScope(requestedModel)
+	if !ok {
+		return nil // 无法解析 scope，跳过检查
+	}
+
+	group, err := s.resolveGroupByID(ctx, groupID)
+	if err != nil {
+		return nil // 查询失败时放行
+	}
+	if group == nil {
+		return nil // 分组不存在时放行
+	}
+
+	if !IsScopeSupported(group.SupportedModelScopes, scope) {
+		return ErrModelScopeNotSupported
+	}
+	return nil
+}
+
 // GetAvailableModels returns the list of models available for a group
 // It aggregates model_mapping keys from all schedulable accounts in the group
 func (s *GatewayService) GetAvailableModels(ctx context.Context, groupID *int64, platform string) []string {
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -977,6 +977,11 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 		return nil, s.writeGoogleError(c, http.StatusBadRequest, "Request body is empty")
 	}

+	// 过滤掉 parts 为空的消息（Gemini API 不接受空 parts）
+	if filteredBody, err := filterEmptyPartsFromGeminiRequest(body); err == nil {
+		body = filteredBody
+	}
+
 	switch action {
 	case "generateContent", "streamGenerateContent", "countTokens":
 		// ok
--- a/backend/internal/service/gemini_native_signature_cleaner.go
+++ b/backend/internal/service/gemini_native_signature_cleaner.go
@@ -2,20 +2,22 @@ package service

 import (
 	"encoding/json"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 )

-// CleanGeminiNativeThoughtSignatures 从 Gemini 原生 API 请求中移除 thoughtSignature 字段，
+// CleanGeminiNativeThoughtSignatures 从 Gemini 原生 API 请求中替换 thoughtSignature 字段为 dummy 签名，
 // 以避免跨账号签名验证错误。
 //
 // 当粘性会话切换账号时（例如原账号异常、不可调度等），旧账号返回的 thoughtSignature
-// 会导致新账号的签名验证失败。通过移除这些签名，让新账号重新生成有效的签名。
+// 会导致新账号的签名验证失败。通过替换为 dummy 签名，跳过签名验证。
 //
-// CleanGeminiNativeThoughtSignatures removes thoughtSignature fields from Gemini native API requests
-// to avoid cross-account signature validation errors.
+// CleanGeminiNativeThoughtSignatures replaces thoughtSignature fields with dummy signature
+// in Gemini native API requests to avoid cross-account signature validation errors.
 //
 // When sticky session switches accounts (e.g., original account becomes unavailable),
 // thoughtSignatures from the old account will cause validation failures on the new account.
-// By removing these signatures, we allow the new account to generate valid signatures.
+// By replacing with dummy signature, we skip signature validation.
 func CleanGeminiNativeThoughtSignatures(body []byte) []byte {
 	if len(body) == 0 {
 		return body
@@ -28,11 +30,11 @@ func CleanGeminiNativeThoughtSignatures(body []byte) []byte {
 		return body
 	}

-	// 递归清理 thoughtSignature
-	cleaned := cleanThoughtSignaturesRecursive(data)
+	// 递归替换 thoughtSignature 为 dummy 签名
+	replaced := replaceThoughtSignaturesRecursive(data)

 	// 重新序列化
-	result, err := json.Marshal(cleaned)
+	result, err := json.Marshal(replaced)
 	if err != nil {
 		// 如果序列化失败，返回原始 body
 		return body
@@ -41,19 +43,20 @@ func CleanGeminiNativeThoughtSignatures(body []byte) []byte {
 	return result
 }

-// cleanThoughtSignaturesRecursive 递归遍历数据结构，移除所有 thoughtSignature 字段
-func cleanThoughtSignaturesRecursive(data any) any {
+// replaceThoughtSignaturesRecursive 递归遍历数据结构，将所有 thoughtSignature 字段替换为 dummy 签名
+func replaceThoughtSignaturesRecursive(data any) any {
 	switch v := data.(type) {
 	case map[string]any:
-		// 创建新的 map，移除 thoughtSignature
+		// 创建新的 map，替换 thoughtSignature 为 dummy 签名
 		result := make(map[string]any, len(v))
 		for key, value := range v {
-			// 跳过 thoughtSignature 字段
+			// 替换 thoughtSignature 字段为 dummy 签名
 			if key == "thoughtSignature" {
+				result[key] = antigravity.DummyThoughtSignature
 				continue
 			}
 			// 递归处理嵌套结构
-			result[key] = cleanThoughtSignaturesRecursive(value)
+			result[key] = replaceThoughtSignaturesRecursive(value)
 		}
 		return result

@@ -61,7 +64,7 @@ func cleanThoughtSignaturesRecursive(data any) any {
 		// 递归处理数组中的每个元素
 		result := make([]any, len(v))
 		for i, item := range v {
-			result[i] = cleanThoughtSignaturesRecursive(item)
+			result[i] = replaceThoughtSignaturesRecursive(item)
 		}
 		return result

--- a/backend/internal/service/group.go
+++ b/backend/internal/service/group.go
@@ -29,6 +29,8 @@ type Group struct {
 	// Claude Code 客户端限制
 	ClaudeCodeOnly  bool
 	FallbackGroupID *int64
+	// 无效请求兜底分组（仅 anthropic 平台使用）
+	FallbackGroupIDOnInvalidRequest *int64

 	// 模型路由配置
 	// key: 模型匹配模式（支持 * 通配符，如 "claude-opus-*"）
@@ -36,6 +38,13 @@ type Group struct {
 	ModelRouting        map[string][]int64
 	ModelRoutingEnabled bool

+	// MCP XML 协议注入开关（仅 antigravity 平台使用）
+	MCPXMLInject bool
+
+	// 支持的模型系列（仅 antigravity 平台使用）
+	// 可选值: claude, gemini_text, gemini_image
+	SupportedModelScopes []string
+
 	CreatedAt time.Time
 	UpdatedAt time.Time

--- a/backend/internal/service/identity_service.go
+++ b/backend/internal/service/identity_service.go
@@ -169,22 +169,31 @@ func (s *IdentityService) ApplyFingerprint(req *http.Request, fp *Fingerprint) {
 // RewriteUserID 重写body中的metadata.user_id
 // 输入格式：user_{clientId}_account__session_{sessionUUID}
 // 输出格式：user_{cachedClientID}_account_{accountUUID}_session_{newHash}
+//
+// 重要：此函数使用 json.RawMessage 保留其他字段的原始字节，
+// 避免重新序列化导致 thinking 块等内容被修改。
 func (s *IdentityService) RewriteUserID(body []byte, accountID int64, accountUUID, cachedClientID string) ([]byte, error) {
 	if len(body) == 0 || accountUUID == "" || cachedClientID == "" {
 		return body, nil
 	}

-	// 解析JSON
-	var reqMap map[string]any
+	// 使用 RawMessage 保留其他字段的原始字节
+	var reqMap map[string]json.RawMessage
 	if err := json.Unmarshal(body, &reqMap); err != nil {
 		return body, nil
 	}

-	metadata, ok := reqMap["metadata"].(map[string]any)
+	// 解析 metadata 字段
+	metadataRaw, ok := reqMap["metadata"]
 	if !ok {
 		return body, nil
 	}

+	var metadata map[string]any
+	if err := json.Unmarshal(metadataRaw, &metadata); err != nil {
+		return body, nil
+	}
+
 	userID, ok := metadata["user_id"].(string)
 	if !ok || userID == "" {
 		return body, nil
@@ -207,7 +216,13 @@ func (s *IdentityService) RewriteUserID(body []byte, accountID int64, accountUUI
 	newUserID := fmt.Sprintf("user_%s_account_%s_session_%s", cachedClientID, accountUUID, newSessionHash)

 	metadata["user_id"] = newUserID
-	reqMap["metadata"] = metadata
+
+	// 只重新序列化 metadata 字段
+	newMetadataRaw, err := json.Marshal(metadata)
+	if err != nil {
+		return body, nil
+	}
+	reqMap["metadata"] = newMetadataRaw

 	return json.Marshal(reqMap)
 }
@@ -215,6 +230,9 @@ func (s *IdentityService) RewriteUserID(body []byte, accountID int64, accountUUI
 // RewriteUserIDWithMasking 重写body中的metadata.user_id，支持会话ID伪装
 // 如果账号启用了会话ID伪装（session_id_masking_enabled），
 // 则在完成常规重写后，将 session 部分替换为固定的伪装ID（15分钟内保持不变）
+//
+// 重要：此函数使用 json.RawMessage 保留其他字段的原始字节，
+// 避免重新序列化导致 thinking 块等内容被修改。
 func (s *IdentityService) RewriteUserIDWithMasking(ctx context.Context, body []byte, account *Account, accountUUID, cachedClientID string) ([]byte, error) {
 	// 先执行常规的 RewriteUserID 逻辑
 	newBody, err := s.RewriteUserID(body, account.ID, accountUUID, cachedClientID)
@@ -227,17 +245,23 @@ func (s *IdentityService) RewriteUserIDWithMasking(ctx context.Context, body []b
 		return newBody, nil
 	}

-	// 解析重写后的 body，提取 user_id
-	var reqMap map[string]any
+	// 使用 RawMessage 保留其他字段的原始字节
+	var reqMap map[string]json.RawMessage
 	if err := json.Unmarshal(newBody, &reqMap); err != nil {
 		return newBody, nil
 	}

-	metadata, ok := reqMap["metadata"].(map[string]any)
+	// 解析 metadata 字段
+	metadataRaw, ok := reqMap["metadata"]
 	if !ok {
 		return newBody, nil
 	}

+	var metadata map[string]any
+	if err := json.Unmarshal(metadataRaw, &metadata); err != nil {
+		return newBody, nil
+	}
+
 	userID, ok := metadata["user_id"].(string)
 	if !ok || userID == "" {
 		return newBody, nil
@@ -278,7 +302,13 @@ func (s *IdentityService) RewriteUserIDWithMasking(ctx context.Context, body []b
 	)

 	metadata["user_id"] = newUserID
-	reqMap["metadata"] = metadata
+
+	// 只重新序列化 metadata 字段
+	newMetadataRaw, marshalErr := json.Marshal(metadata)
+	if marshalErr != nil {
+		return newBody, nil
+	}
+	reqMap["metadata"] = newMetadataRaw

 	return json.Marshal(reqMap)
 }
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -72,7 +72,7 @@ type opencodeCacheMetadata struct {
 	LastChecked int64  `json:"lastChecked"`
 }

-func applyCodexOAuthTransform(reqBody map[string]any) codexTransformResult {
+func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool) codexTransformResult {
 	result := codexTransformResult{}
 	// 工具续链需求会影响存储策略与 input 过滤逻辑。
 	needsToolContinuation := NeedsToolContinuation(reqBody)
@@ -118,22 +118,9 @@ func applyCodexOAuthTransform(reqBody map[string]any) codexTransformResult {
 		result.PromptCacheKey = strings.TrimSpace(v)
 	}

-	instructions := strings.TrimSpace(getOpenCodeCodexHeader())
-	existingInstructions, _ := reqBody["instructions"].(string)
-	existingInstructions = strings.TrimSpace(existingInstructions)
-
-	if instructions != "" {
-		if existingInstructions != instructions {
-			reqBody["instructions"] = instructions
-			result.Modified = true
-		}
-	} else if existingInstructions == "" {
-		// 未获取到 opencode 指令时，回退使用 Codex CLI 指令。
-		codexInstructions := strings.TrimSpace(getCodexCLIInstructions())
-		if codexInstructions != "" {
-			reqBody["instructions"] = codexInstructions
-			result.Modified = true
-		}
+	// instructions 处理逻辑：根据是否是 Codex CLI 分别调用不同方法
+	if applyInstructions(reqBody, isCodexCLI) {
+		result.Modified = true
 	}

 	// 续链场景保留 item_reference 与 id，避免 call_id 上下文丢失。
@@ -276,6 +263,72 @@ func GetCodexCLIInstructions() string {
 	return getCodexCLIInstructions()
 }

+// applyInstructions 处理 instructions 字段
+// isCodexCLI=true: 仅补充缺失的 instructions（使用 opencode 指令）
+// isCodexCLI=false: 优先使用 opencode 指令覆盖
+func applyInstructions(reqBody map[string]any, isCodexCLI bool) bool {
+	if isCodexCLI {
+		return applyCodexCLIInstructions(reqBody)
+	}
+	return applyOpenCodeInstructions(reqBody)
+}
+
+// applyCodexCLIInstructions 为 Codex CLI 请求补充缺失的 instructions
+// 仅在 instructions 为空时添加 opencode 指令
+func applyCodexCLIInstructions(reqBody map[string]any) bool {
+	if !isInstructionsEmpty(reqBody) {
+		return false // 已有有效 instructions，不修改
+	}
+
+	instructions := strings.TrimSpace(getOpenCodeCodexHeader())
+	if instructions != "" {
+		reqBody["instructions"] = instructions
+		return true
+	}
+
+	return false
+}
+
+// applyOpenCodeInstructions 为非 Codex CLI 请求应用 opencode 指令
+// 优先使用 opencode 指令覆盖
+func applyOpenCodeInstructions(reqBody map[string]any) bool {
+	instructions := strings.TrimSpace(getOpenCodeCodexHeader())
+	existingInstructions, _ := reqBody["instructions"].(string)
+	existingInstructions = strings.TrimSpace(existingInstructions)
+
+	if instructions != "" {
+		if existingInstructions != instructions {
+			reqBody["instructions"] = instructions
+			return true
+		}
+	} else if existingInstructions == "" {
+		codexInstructions := strings.TrimSpace(getCodexCLIInstructions())
+		if codexInstructions != "" {
+			reqBody["instructions"] = codexInstructions
+			return true
+		}
+	}
+
+	return false
+}
+
+// isInstructionsEmpty 检查 instructions 字段是否为空
+// 处理以下情况：字段不存在、nil、空字符串、纯空白字符串
+func isInstructionsEmpty(reqBody map[string]any) bool {
+	val, exists := reqBody["instructions"]
+	if !exists {
+		return true
+	}
+	if val == nil {
+		return true
+	}
+	str, ok := val.(string)
+	if !ok {
+		return true
+	}
+	return strings.TrimSpace(str) == ""
+}
+
 // ReplaceWithCodexInstructions 将请求 instructions 替换为内置 Codex 指令（必要时）。
 func ReplaceWithCodexInstructions(reqBody map[string]any) bool {
 	codexInstructions := strings.TrimSpace(getCodexCLIInstructions())
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -23,7 +23,7 @@ func TestApplyCodexOAuthTransform_ToolContinuationPreservesInput(t *testing.T) {
 		"tool_choice": "auto",
 	}

-	applyCodexOAuthTransform(reqBody)
+	applyCodexOAuthTransform(reqBody, false)

 	// 未显式设置 store=true，默认为 false。
 	store, ok := reqBody["store"].(bool)
@@ -59,7 +59,7 @@ func TestApplyCodexOAuthTransform_ExplicitStoreFalsePreserved(t *testing.T) {
 		"tool_choice": "auto",
 	}

-	applyCodexOAuthTransform(reqBody)
+	applyCodexOAuthTransform(reqBody, false)

 	store, ok := reqBody["store"].(bool)
 	require.True(t, ok)
@@ -79,7 +79,7 @@ func TestApplyCodexOAuthTransform_ExplicitStoreTrueForcedFalse(t *testing.T) {
 		"tool_choice": "auto",
 	}

-	applyCodexOAuthTransform(reqBody)
+	applyCodexOAuthTransform(reqBody, false)

 	store, ok := reqBody["store"].(bool)
 	require.True(t, ok)
@@ -97,7 +97,7 @@ func TestApplyCodexOAuthTransform_NonContinuationDefaultsStoreFalseAndStripsIDs(
 		},
 	}

-	applyCodexOAuthTransform(reqBody)
+	applyCodexOAuthTransform(reqBody, false)

 	store, ok := reqBody["store"].(bool)
 	require.True(t, ok)
@@ -148,7 +148,7 @@ func TestApplyCodexOAuthTransform_NormalizeCodexTools_PreservesResponsesFunction
 		},
 	}

-	applyCodexOAuthTransform(reqBody)
+	applyCodexOAuthTransform(reqBody, false)

 	tools, ok := reqBody["tools"].([]any)
 	require.True(t, ok)
@@ -169,7 +169,7 @@ func TestApplyCodexOAuthTransform_EmptyInput(t *testing.T) {
 		"input": []any{},
 	}

-	applyCodexOAuthTransform(reqBody)
+	applyCodexOAuthTransform(reqBody, false)

 	input, ok := reqBody["input"].([]any)
 	require.True(t, ok)
@@ -196,3 +196,77 @@ func setupCodexCache(t *testing.T) {
 	require.NoError(t, err)
 	require.NoError(t, os.WriteFile(filepath.Join(cacheDir, "opencode-codex-header-meta.json"), data, 0o644))
 }
+
+func TestApplyCodexOAuthTransform_CodexCLI_PreservesExistingInstructions(t *testing.T) {
+	// Codex CLI 场景：已有 instructions 时不修改
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model":        "gpt-5.1",
+		"instructions": "existing instructions",
+	}
+
+	result := applyCodexOAuthTransform(reqBody, true) // isCodexCLI=true
+
+	instructions, ok := reqBody["instructions"].(string)
+	require.True(t, ok)
+	require.Equal(t, "existing instructions", instructions)
+	// Modified 仍可能为 true（因为其他字段被修改），但 instructions 应保持不变
+	_ = result
+}
+
+func TestApplyCodexOAuthTransform_CodexCLI_SuppliesDefaultWhenEmpty(t *testing.T) {
+	// Codex CLI 场景：无 instructions 时补充默认值
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model": "gpt-5.1",
+		// 没有 instructions 字段
+	}
+
+	result := applyCodexOAuthTransform(reqBody, true) // isCodexCLI=true
+
+	instructions, ok := reqBody["instructions"].(string)
+	require.True(t, ok)
+	require.NotEmpty(t, instructions)
+	require.True(t, result.Modified)
+}
+
+func TestApplyCodexOAuthTransform_NonCodexCLI_OverridesInstructions(t *testing.T) {
+	// 非 Codex CLI 场景：使用 opencode 指令覆盖
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model":        "gpt-5.1",
+		"instructions": "old instructions",
+	}
+
+	result := applyCodexOAuthTransform(reqBody, false) // isCodexCLI=false
+
+	instructions, ok := reqBody["instructions"].(string)
+	require.True(t, ok)
+	require.NotEqual(t, "old instructions", instructions)
+	require.True(t, result.Modified)
+}
+
+func TestIsInstructionsEmpty(t *testing.T) {
+	tests := []struct {
+		name     string
+		reqBody  map[string]any
+		expected bool
+	}{
+		{"missing field", map[string]any{}, true},
+		{"nil value", map[string]any{"instructions": nil}, true},
+		{"empty string", map[string]any{"instructions": ""}, true},
+		{"whitespace only", map[string]any{"instructions": "   "}, true},
+		{"non-string", map[string]any{"instructions": 123}, true},
+		{"valid string", map[string]any{"instructions": "hello"}, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := isInstructionsEmpty(tt.reqBody)
+			require.Equal(t, tt.expected, result)
+		})
+	}
+}
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -796,8 +796,8 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 		}
 	}

-	if account.Type == AccountTypeOAuth && !isCodexCLI {
-		codexResult := applyCodexOAuthTransform(reqBody)
+	if account.Type == AccountTypeOAuth {
+		codexResult := applyCodexOAuthTransform(reqBody, isCodexCLI)
 		if codexResult.Modified {
 			bodyModified = true
 		}
@@ -1681,13 +1681,14 @@ func (s *OpenAIGatewayService) replaceModelInResponseBody(body []byte, fromModel

 // OpenAIRecordUsageInput input for recording usage
 type OpenAIRecordUsageInput struct {
-	Result       *OpenAIForwardResult
-	APIKey       *APIKey
-	User         *User
-	Account      *Account
-	Subscription *UserSubscription
-	UserAgent    string // 请求的 User-Agent
-	IPAddress    string // 请求的客户端 IP 地址
+	Result        *OpenAIForwardResult
+	APIKey        *APIKey
+	User          *User
+	Account       *Account
+	Subscription  *UserSubscription
+	UserAgent     string // 请求的 User-Agent
+	IPAddress     string // 请求的客户端 IP 地址
+	APIKeyService APIKeyQuotaUpdater
 }

 // RecordUsage records usage and deducts balance
@@ -1799,6 +1800,13 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
 		}
 	}

+	// Update API key quota if applicable (only for balance mode with quota set)
+	if shouldBill && cost.ActualCost > 0 && apiKey.Quota > 0 && input.APIKeyService != nil {
+		if err := input.APIKeyService.UpdateQuotaUsed(ctx, apiKey.ID, cost.ActualCost); err != nil {
+			log.Printf("Update API key quota failed: %v", err)
+		}
+	}
+
 	// Schedule batch update for account last_used_at
 	s.deferredService.ScheduleLastUsedUpdate(account.ID)

--- a/backend/internal/service/ops_metrics_collector.go
+++ b/backend/internal/service/ops_metrics_collector.go
@@ -285,6 +285,11 @@ func (c *OpsMetricsCollector) collectAndPersist(ctx context.Context) error {
 		return fmt.Errorf("query error counts: %w", err)
 	}

+	accountSwitchCount, err := c.queryAccountSwitchCount(ctx, windowStart, windowEnd)
+	if err != nil {
+		return fmt.Errorf("query account switch counts: %w", err)
+	}
+
 	windowSeconds := windowEnd.Sub(windowStart).Seconds()
 	if windowSeconds <= 0 {
 		windowSeconds = 60
@@ -309,9 +314,10 @@ func (c *OpsMetricsCollector) collectAndPersist(ctx context.Context) error {
 		Upstream429Count:             upstream429,
 		Upstream529Count:             upstream529,

-		TokenConsumed: tokenConsumed,
-		QPS:           float64Ptr(roundTo1DP(qps)),
-		TPS:           float64Ptr(roundTo1DP(tps)),
+		TokenConsumed:      tokenConsumed,
+		AccountSwitchCount: accountSwitchCount,
+		QPS:                float64Ptr(roundTo1DP(qps)),
+		TPS:                float64Ptr(roundTo1DP(tps)),

 		DurationP50Ms: duration.p50,
 		DurationP90Ms: duration.p90,
@@ -551,6 +557,27 @@ WHERE created_at >= $1 AND created_at < $2`
 	return errorTotal, businessLimited, errorSLA, upstreamExcl429529, upstream429, upstream529, nil
 }

+func (c *OpsMetricsCollector) queryAccountSwitchCount(ctx context.Context, start, end time.Time) (int64, error) {
+	q := `
+SELECT
+  COALESCE(SUM(CASE
+    WHEN split_part(ev->>'kind', ':', 1) IN ('failover', 'retry_exhausted_failover', 'failover_on_400') THEN 1
+    ELSE 0
+  END), 0) AS switch_count
+FROM ops_error_logs o
+CROSS JOIN LATERAL jsonb_array_elements(
+  COALESCE(NULLIF(o.upstream_errors, 'null'::jsonb), '[]'::jsonb)
+) AS ev
+WHERE o.created_at >= $1 AND o.created_at < $2
+  AND o.is_count_tokens = FALSE`
+
+	var count int64
+	if err := c.db.QueryRowContext(ctx, q, start, end).Scan(&count); err != nil {
+		return 0, err
+	}
+	return count, nil
+}
+
 type opsCollectedSystemStats struct {
 	cpuUsagePercent    *float64
 	memoryUsedMB       *int64
--- a/backend/internal/service/ops_port.go
+++ b/backend/internal/service/ops_port.go
@@ -161,7 +161,8 @@ type OpsInsertSystemMetricsInput struct {
 	Upstream429Count             int64
 	Upstream529Count             int64

-	TokenConsumed int64
+	TokenConsumed      int64
+	AccountSwitchCount int64

 	QPS *float64
 	TPS *float64
@@ -223,8 +224,9 @@ type OpsSystemMetricsSnapshot struct {
 	DBConnIdle    *int `json:"db_conn_idle"`
 	DBConnWaiting *int `json:"db_conn_waiting"`

-	GoroutineCount        *int `json:"goroutine_count"`
-	ConcurrencyQueueDepth *int `json:"concurrency_queue_depth"`
+	GoroutineCount        *int   `json:"goroutine_count"`
+	ConcurrencyQueueDepth *int   `json:"concurrency_queue_depth"`
+	AccountSwitchCount    *int64 `json:"account_switch_count"`
 }

 type OpsUpsertJobHeartbeatInput struct {
--- a/backend/internal/service/ops_retry.go
+++ b/backend/internal/service/ops_retry.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 	"time"

+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/gin-gonic/gin"
 	"github.com/lib/pq"
@@ -476,9 +477,13 @@ func (s *OpsService) executeClientRetry(ctx context.Context, reqType opsRetryReq
 			continue
 		}

+		attemptCtx := ctx
+		if switches > 0 {
+			attemptCtx = context.WithValue(attemptCtx, ctxkey.AccountSwitchCount, switches)
+		}
 		exec := func() *opsRetryExecution {
 			defer selection.ReleaseFunc()
-			return s.executeWithAccount(ctx, reqType, errorLog, body, account)
+			return s.executeWithAccount(attemptCtx, reqType, errorLog, body, account)
 		}()

 		if exec != nil {
--- a/backend/internal/service/ops_trend_models.go
+++ b/backend/internal/service/ops_trend_models.go
@@ -6,6 +6,7 @@ type OpsThroughputTrendPoint struct {
 	BucketStart   time.Time `json:"bucket_start"`
 	RequestCount  int64     `json:"request_count"`
 	TokenConsumed int64     `json:"token_consumed"`
+	SwitchCount   int64     `json:"switch_count"`
 	QPS           float64   `json:"qps"`
 	TPS           float64   `json:"tps"`
 }
--- a/backend/internal/service/user_service.go
+++ b/backend/internal/service/user_service.go
@@ -39,7 +39,7 @@ type UserRepository interface {
 	ExistsByEmail(ctx context.Context, email string) (bool, error)
 	RemoveGroupFromAllowedGroups(ctx context.Context, groupID int64) (int64, error)

-	// TOTP 相关方法
+	// TOTP 双因素认证
 	UpdateTotpSecret(ctx context.Context, userID int64, encryptedSecret *string) error
 	EnableTotp(ctx context.Context, userID int64) error
 	DisableTotp(ctx context.Context, userID int64) error