feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops
Key changes: - Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching - Unified rate limiting: scope-level → model-level with Redis snapshot sync - Load-balanced scheduling by call count with smart retry mechanism - Force cache billing support - Model identity injection in prompts with leak prevention - Thinking mode auto-handling (max_tokens/budget_tokens fix) - Frontend: whitelist mode toggle, model mapping validation, status indicators - Gemini session fallback with Redis Trie O(L) matching - Ops: enhanced concurrency monitoring, account availability, retry logic - Migration scripts: 049-051 for model mapping unification
This commit is contained in:
@@ -255,3 +255,142 @@ func (s *OpsService) GetConcurrencyStats(
|
||||
|
||||
return platform, group, account, &collectedAt, nil
|
||||
}
|
||||
|
||||
// listAllActiveUsersForOps returns all active users with their concurrency settings.
|
||||
func (s *OpsService) listAllActiveUsersForOps(ctx context.Context) ([]User, error) {
|
||||
if s == nil || s.userRepo == nil {
|
||||
return []User{}, nil
|
||||
}
|
||||
|
||||
out := make([]User, 0, 128)
|
||||
page := 1
|
||||
for {
|
||||
users, pageInfo, err := s.userRepo.ListWithFilters(ctx, pagination.PaginationParams{
|
||||
Page: page,
|
||||
PageSize: opsAccountsPageSize,
|
||||
}, UserListFilters{
|
||||
Status: StatusActive,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(users) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
out = append(out, users...)
|
||||
if pageInfo != nil && int64(len(out)) >= pageInfo.Total {
|
||||
break
|
||||
}
|
||||
if len(users) < opsAccountsPageSize {
|
||||
break
|
||||
}
|
||||
|
||||
page++
|
||||
if page > 10_000 {
|
||||
log.Printf("[Ops] listAllActiveUsersForOps: aborting after too many pages")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// getUsersLoadMapBestEffort returns user load info for the given users.
|
||||
func (s *OpsService) getUsersLoadMapBestEffort(ctx context.Context, users []User) map[int64]*UserLoadInfo {
|
||||
if s == nil || s.concurrencyService == nil {
|
||||
return map[int64]*UserLoadInfo{}
|
||||
}
|
||||
if len(users) == 0 {
|
||||
return map[int64]*UserLoadInfo{}
|
||||
}
|
||||
|
||||
// De-duplicate IDs (and keep the max concurrency to avoid under-reporting).
|
||||
unique := make(map[int64]int, len(users))
|
||||
for _, u := range users {
|
||||
if u.ID <= 0 {
|
||||
continue
|
||||
}
|
||||
if prev, ok := unique[u.ID]; !ok || u.Concurrency > prev {
|
||||
unique[u.ID] = u.Concurrency
|
||||
}
|
||||
}
|
||||
|
||||
batch := make([]UserWithConcurrency, 0, len(unique))
|
||||
for id, maxConc := range unique {
|
||||
batch = append(batch, UserWithConcurrency{
|
||||
ID: id,
|
||||
MaxConcurrency: maxConc,
|
||||
})
|
||||
}
|
||||
|
||||
out := make(map[int64]*UserLoadInfo, len(batch))
|
||||
for i := 0; i < len(batch); i += opsConcurrencyBatchChunkSize {
|
||||
end := i + opsConcurrencyBatchChunkSize
|
||||
if end > len(batch) {
|
||||
end = len(batch)
|
||||
}
|
||||
part, err := s.concurrencyService.GetUsersLoadBatch(ctx, batch[i:end])
|
||||
if err != nil {
|
||||
// Best-effort: return zeros rather than failing the ops UI.
|
||||
log.Printf("[Ops] GetUsersLoadBatch failed: %v", err)
|
||||
continue
|
||||
}
|
||||
for k, v := range part {
|
||||
out[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// GetUserConcurrencyStats returns real-time concurrency usage for all active users.
|
||||
func (s *OpsService) GetUserConcurrencyStats(ctx context.Context) (map[int64]*UserConcurrencyInfo, *time.Time, error) {
|
||||
if err := s.RequireMonitoringEnabled(ctx); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
users, err := s.listAllActiveUsersForOps(ctx)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
collectedAt := time.Now()
|
||||
loadMap := s.getUsersLoadMapBestEffort(ctx, users)
|
||||
|
||||
result := make(map[int64]*UserConcurrencyInfo)
|
||||
|
||||
for _, u := range users {
|
||||
if u.ID <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
load := loadMap[u.ID]
|
||||
currentInUse := int64(0)
|
||||
waiting := int64(0)
|
||||
if load != nil {
|
||||
currentInUse = int64(load.CurrentConcurrency)
|
||||
waiting = int64(load.WaitingCount)
|
||||
}
|
||||
|
||||
// Skip users with no concurrency activity
|
||||
if currentInUse == 0 && waiting == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
info := &UserConcurrencyInfo{
|
||||
UserID: u.ID,
|
||||
UserEmail: u.Email,
|
||||
Username: u.Username,
|
||||
CurrentInUse: currentInUse,
|
||||
MaxCapacity: int64(u.Concurrency),
|
||||
WaitingInQueue: waiting,
|
||||
}
|
||||
if info.MaxCapacity > 0 {
|
||||
info.LoadPercentage = float64(info.CurrentInUse) / float64(info.MaxCapacity) * 100
|
||||
}
|
||||
result[u.ID] = info
|
||||
}
|
||||
|
||||
return result, &collectedAt, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user