feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops
Key changes: - Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching - Unified rate limiting: scope-level → model-level with Redis snapshot sync - Load-balanced scheduling by call count with smart retry mechanism - Force cache billing support - Model identity injection in prompts with leak prevention - Thinking mode auto-handling (max_tokens/budget_tokens fix) - Frontend: whitelist mode toggle, model mapping validation, status indicators - Gemini session fallback with Redis Trie O(L) matching - Ops: enhanced concurrency monitoring, account availability, retry logic - Migration scripts: 049-051 for model mapping unification
This commit is contained in:
@@ -35,6 +35,7 @@ type ConcurrencyCache interface {
|
||||
|
||||
// 批量负载查询(只读)
|
||||
GetAccountsLoadBatch(ctx context.Context, accounts []AccountWithConcurrency) (map[int64]*AccountLoadInfo, error)
|
||||
GetUsersLoadBatch(ctx context.Context, users []UserWithConcurrency) (map[int64]*UserLoadInfo, error)
|
||||
|
||||
// 清理过期槽位(后台任务)
|
||||
CleanupExpiredAccountSlots(ctx context.Context, accountID int64) error
|
||||
@@ -77,6 +78,11 @@ type AccountWithConcurrency struct {
|
||||
MaxConcurrency int
|
||||
}
|
||||
|
||||
type UserWithConcurrency struct {
|
||||
ID int64
|
||||
MaxConcurrency int
|
||||
}
|
||||
|
||||
type AccountLoadInfo struct {
|
||||
AccountID int64
|
||||
CurrentConcurrency int
|
||||
@@ -84,6 +90,13 @@ type AccountLoadInfo struct {
|
||||
LoadRate int // 0-100+ (percent)
|
||||
}
|
||||
|
||||
type UserLoadInfo struct {
|
||||
UserID int64
|
||||
CurrentConcurrency int
|
||||
WaitingCount int
|
||||
LoadRate int // 0-100+ (percent)
|
||||
}
|
||||
|
||||
// AcquireAccountSlot attempts to acquire a concurrency slot for an account.
|
||||
// If the account is at max concurrency, it waits until a slot is available or timeout.
|
||||
// Returns a release function that MUST be called when the request completes.
|
||||
@@ -253,6 +266,14 @@ func (s *ConcurrencyService) GetAccountsLoadBatch(ctx context.Context, accounts
|
||||
return s.cache.GetAccountsLoadBatch(ctx, accounts)
|
||||
}
|
||||
|
||||
// GetUsersLoadBatch returns load info for multiple users.
|
||||
func (s *ConcurrencyService) GetUsersLoadBatch(ctx context.Context, users []UserWithConcurrency) (map[int64]*UserLoadInfo, error) {
|
||||
if s.cache == nil {
|
||||
return map[int64]*UserLoadInfo{}, nil
|
||||
}
|
||||
return s.cache.GetUsersLoadBatch(ctx, users)
|
||||
}
|
||||
|
||||
// CleanupExpiredAccountSlots removes expired slots for one account (background task).
|
||||
func (s *ConcurrencyService) CleanupExpiredAccountSlots(ctx context.Context, accountID int64) error {
|
||||
if s.cache == nil {
|
||||
|
||||
Reference in New Issue
Block a user