package service import ( "container/heap" "context" "errors" "hash/fnv" "math" "sort" "strconv" "strings" "sync" "sync/atomic" "time" ) const ( openAIAccountScheduleLayerPreviousResponse = "previous_response_id" openAIAccountScheduleLayerSessionSticky = "session_hash" openAIAccountScheduleLayerLoadBalance = "load_balance" ) type OpenAIAccountScheduleRequest struct { GroupID *int64 SessionHash string StickyAccountID int64 PreviousResponseID string RequestedModel string RequiredTransport OpenAIUpstreamTransport ExcludedIDs map[int64]struct{} } type OpenAIAccountScheduleDecision struct { Layer string StickyPreviousHit bool StickySessionHit bool CandidateCount int TopK int LatencyMs int64 LoadSkew float64 SelectedAccountID int64 SelectedAccountType string } type OpenAIAccountSchedulerMetricsSnapshot struct { SelectTotal int64 StickyPreviousHitTotal int64 StickySessionHitTotal int64 LoadBalanceSelectTotal int64 AccountSwitchTotal int64 SchedulerLatencyMsTotal int64 SchedulerLatencyMsAvg float64 StickyHitRatio float64 AccountSwitchRate float64 LoadSkewAvg float64 RuntimeStatsAccountCount int } type OpenAIAccountScheduler interface { Select(ctx context.Context, req OpenAIAccountScheduleRequest) (*AccountSelectionResult, OpenAIAccountScheduleDecision, error) ReportResult(accountID int64, success bool, firstTokenMs *int) ReportSwitch() SnapshotMetrics() OpenAIAccountSchedulerMetricsSnapshot } type openAIAccountSchedulerMetrics struct { selectTotal atomic.Int64 stickyPreviousHitTotal atomic.Int64 stickySessionHitTotal atomic.Int64 loadBalanceSelectTotal atomic.Int64 accountSwitchTotal atomic.Int64 latencyMsTotal atomic.Int64 loadSkewMilliTotal atomic.Int64 } func (m *openAIAccountSchedulerMetrics) recordSelect(decision OpenAIAccountScheduleDecision) { if m == nil { return } m.selectTotal.Add(1) m.latencyMsTotal.Add(decision.LatencyMs) m.loadSkewMilliTotal.Add(int64(math.Round(decision.LoadSkew * 1000))) if decision.StickyPreviousHit { m.stickyPreviousHitTotal.Add(1) } if decision.StickySessionHit { m.stickySessionHitTotal.Add(1) } if decision.Layer == openAIAccountScheduleLayerLoadBalance { m.loadBalanceSelectTotal.Add(1) } } func (m *openAIAccountSchedulerMetrics) recordSwitch() { if m == nil { return } m.accountSwitchTotal.Add(1) } type openAIAccountRuntimeStats struct { accounts sync.Map accountCount atomic.Int64 } type openAIAccountRuntimeStat struct { errorRateEWMABits atomic.Uint64 ttftEWMABits atomic.Uint64 } func newOpenAIAccountRuntimeStats() *openAIAccountRuntimeStats { return &openAIAccountRuntimeStats{} } func (s *openAIAccountRuntimeStats) loadOrCreate(accountID int64) *openAIAccountRuntimeStat { if value, ok := s.accounts.Load(accountID); ok { stat, _ := value.(*openAIAccountRuntimeStat) if stat != nil { return stat } } stat := &openAIAccountRuntimeStat{} stat.ttftEWMABits.Store(math.Float64bits(math.NaN())) actual, loaded := s.accounts.LoadOrStore(accountID, stat) if !loaded { s.accountCount.Add(1) return stat } existing, _ := actual.(*openAIAccountRuntimeStat) if existing != nil { return existing } return stat } func updateEWMAAtomic(target *atomic.Uint64, sample float64, alpha float64) { for { oldBits := target.Load() oldValue := math.Float64frombits(oldBits) newValue := alpha*sample + (1-alpha)*oldValue if target.CompareAndSwap(oldBits, math.Float64bits(newValue)) { return } } } func (s *openAIAccountRuntimeStats) report(accountID int64, success bool, firstTokenMs *int) { if s == nil || accountID <= 0 { return } const alpha = 0.2 stat := s.loadOrCreate(accountID) errorSample := 1.0 if success { errorSample = 0.0 } updateEWMAAtomic(&stat.errorRateEWMABits, errorSample, alpha) if firstTokenMs != nil && *firstTokenMs > 0 { ttft := float64(*firstTokenMs) ttftBits := math.Float64bits(ttft) for { oldBits := stat.ttftEWMABits.Load() oldValue := math.Float64frombits(oldBits) if math.IsNaN(oldValue) { if stat.ttftEWMABits.CompareAndSwap(oldBits, ttftBits) { break } continue } newValue := alpha*ttft + (1-alpha)*oldValue if stat.ttftEWMABits.CompareAndSwap(oldBits, math.Float64bits(newValue)) { break } } } } func (s *openAIAccountRuntimeStats) snapshot(accountID int64) (errorRate float64, ttft float64, hasTTFT bool) { if s == nil || accountID <= 0 { return 0, 0, false } value, ok := s.accounts.Load(accountID) if !ok { return 0, 0, false } stat, _ := value.(*openAIAccountRuntimeStat) if stat == nil { return 0, 0, false } errorRate = clamp01(math.Float64frombits(stat.errorRateEWMABits.Load())) ttftValue := math.Float64frombits(stat.ttftEWMABits.Load()) if math.IsNaN(ttftValue) { return errorRate, 0, false } return errorRate, ttftValue, true } func (s *openAIAccountRuntimeStats) size() int { if s == nil { return 0 } return int(s.accountCount.Load()) } type defaultOpenAIAccountScheduler struct { service *OpenAIGatewayService metrics openAIAccountSchedulerMetrics stats *openAIAccountRuntimeStats } func newDefaultOpenAIAccountScheduler(service *OpenAIGatewayService, stats *openAIAccountRuntimeStats) OpenAIAccountScheduler { if stats == nil { stats = newOpenAIAccountRuntimeStats() } return &defaultOpenAIAccountScheduler{ service: service, stats: stats, } } func (s *defaultOpenAIAccountScheduler) Select( ctx context.Context, req OpenAIAccountScheduleRequest, ) (*AccountSelectionResult, OpenAIAccountScheduleDecision, error) { decision := OpenAIAccountScheduleDecision{} start := time.Now() defer func() { decision.LatencyMs = time.Since(start).Milliseconds() s.metrics.recordSelect(decision) }() previousResponseID := strings.TrimSpace(req.PreviousResponseID) if previousResponseID != "" { selection, err := s.service.SelectAccountByPreviousResponseID( ctx, req.GroupID, previousResponseID, req.RequestedModel, req.ExcludedIDs, ) if err != nil { return nil, decision, err } if selection != nil && selection.Account != nil { if !s.isAccountTransportCompatible(selection.Account, req.RequiredTransport) { selection = nil } } if selection != nil && selection.Account != nil { decision.Layer = openAIAccountScheduleLayerPreviousResponse decision.StickyPreviousHit = true decision.SelectedAccountID = selection.Account.ID decision.SelectedAccountType = selection.Account.Type if req.SessionHash != "" { _ = s.service.BindStickySession(ctx, req.GroupID, req.SessionHash, selection.Account.ID) } return selection, decision, nil } } selection, err := s.selectBySessionHash(ctx, req) if err != nil { return nil, decision, err } if selection != nil && selection.Account != nil { decision.Layer = openAIAccountScheduleLayerSessionSticky decision.StickySessionHit = true decision.SelectedAccountID = selection.Account.ID decision.SelectedAccountType = selection.Account.Type return selection, decision, nil } selection, candidateCount, topK, loadSkew, err := s.selectByLoadBalance(ctx, req) decision.Layer = openAIAccountScheduleLayerLoadBalance decision.CandidateCount = candidateCount decision.TopK = topK decision.LoadSkew = loadSkew if err != nil { return nil, decision, err } if selection != nil && selection.Account != nil { decision.SelectedAccountID = selection.Account.ID decision.SelectedAccountType = selection.Account.Type } return selection, decision, nil } func (s *defaultOpenAIAccountScheduler) selectBySessionHash( ctx context.Context, req OpenAIAccountScheduleRequest, ) (*AccountSelectionResult, error) { sessionHash := strings.TrimSpace(req.SessionHash) if sessionHash == "" || s == nil || s.service == nil || s.service.cache == nil { return nil, nil } accountID := req.StickyAccountID if accountID <= 0 { var err error accountID, err = s.service.getStickySessionAccountID(ctx, req.GroupID, sessionHash) if err != nil || accountID <= 0 { return nil, nil } } if accountID <= 0 { return nil, nil } if req.ExcludedIDs != nil { if _, excluded := req.ExcludedIDs[accountID]; excluded { return nil, nil } } account, err := s.service.getSchedulableAccount(ctx, accountID) if err != nil || account == nil { _ = s.service.deleteStickySessionAccountID(ctx, req.GroupID, sessionHash) return nil, nil } if shouldClearStickySession(account, req.RequestedModel) || !account.IsOpenAI() { _ = s.service.deleteStickySessionAccountID(ctx, req.GroupID, sessionHash) return nil, nil } if req.RequestedModel != "" && !account.IsModelSupported(req.RequestedModel) { return nil, nil } if !s.isAccountTransportCompatible(account, req.RequiredTransport) { _ = s.service.deleteStickySessionAccountID(ctx, req.GroupID, sessionHash) return nil, nil } result, acquireErr := s.service.tryAcquireAccountSlot(ctx, accountID, account.Concurrency) if acquireErr == nil && result.Acquired { _ = s.service.refreshStickySessionTTL(ctx, req.GroupID, sessionHash, s.service.openAIWSSessionStickyTTL()) return &AccountSelectionResult{ Account: account, Acquired: true, ReleaseFunc: result.ReleaseFunc, }, nil } cfg := s.service.schedulingConfig() if s.service.concurrencyService != nil { return &AccountSelectionResult{ Account: account, WaitPlan: &AccountWaitPlan{ AccountID: accountID, MaxConcurrency: account.Concurrency, Timeout: cfg.StickySessionWaitTimeout, MaxWaiting: cfg.StickySessionMaxWaiting, }, }, nil } return nil, nil } type openAIAccountCandidateScore struct { account *Account loadInfo *AccountLoadInfo score float64 errorRate float64 ttft float64 hasTTFT bool } type openAIAccountCandidateHeap []openAIAccountCandidateScore func (h openAIAccountCandidateHeap) Len() int { return len(h) } func (h openAIAccountCandidateHeap) Less(i, j int) bool { // 最小堆根节点保存“最差”候选,便于 O(log k) 维护 topK。 return isOpenAIAccountCandidateBetter(h[j], h[i]) } func (h openAIAccountCandidateHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } func (h *openAIAccountCandidateHeap) Push(x any) { candidate, ok := x.(openAIAccountCandidateScore) if !ok { panic("openAIAccountCandidateHeap: invalid element type") } *h = append(*h, candidate) } func (h *openAIAccountCandidateHeap) Pop() any { old := *h n := len(old) last := old[n-1] *h = old[:n-1] return last } func isOpenAIAccountCandidateBetter(left openAIAccountCandidateScore, right openAIAccountCandidateScore) bool { if left.score != right.score { return left.score > right.score } if left.account.Priority != right.account.Priority { return left.account.Priority < right.account.Priority } if left.loadInfo.LoadRate != right.loadInfo.LoadRate { return left.loadInfo.LoadRate < right.loadInfo.LoadRate } if left.loadInfo.WaitingCount != right.loadInfo.WaitingCount { return left.loadInfo.WaitingCount < right.loadInfo.WaitingCount } return left.account.ID < right.account.ID } func selectTopKOpenAICandidates(candidates []openAIAccountCandidateScore, topK int) []openAIAccountCandidateScore { if len(candidates) == 0 { return nil } if topK <= 0 { topK = 1 } if topK >= len(candidates) { ranked := append([]openAIAccountCandidateScore(nil), candidates...) sort.Slice(ranked, func(i, j int) bool { return isOpenAIAccountCandidateBetter(ranked[i], ranked[j]) }) return ranked } best := make(openAIAccountCandidateHeap, 0, topK) for _, candidate := range candidates { if len(best) < topK { heap.Push(&best, candidate) continue } if isOpenAIAccountCandidateBetter(candidate, best[0]) { best[0] = candidate heap.Fix(&best, 0) } } ranked := make([]openAIAccountCandidateScore, len(best)) copy(ranked, best) sort.Slice(ranked, func(i, j int) bool { return isOpenAIAccountCandidateBetter(ranked[i], ranked[j]) }) return ranked } type openAISelectionRNG struct { state uint64 } func newOpenAISelectionRNG(seed uint64) openAISelectionRNG { if seed == 0 { seed = 0x9e3779b97f4a7c15 } return openAISelectionRNG{state: seed} } func (r *openAISelectionRNG) nextUint64() uint64 { // xorshift64* x := r.state x ^= x >> 12 x ^= x << 25 x ^= x >> 27 r.state = x return x * 2685821657736338717 } func (r *openAISelectionRNG) nextFloat64() float64 { // [0,1) return float64(r.nextUint64()>>11) / (1 << 53) } func deriveOpenAISelectionSeed(req OpenAIAccountScheduleRequest) uint64 { hasher := fnv.New64a() writeValue := func(value string) { trimmed := strings.TrimSpace(value) if trimmed == "" { return } _, _ = hasher.Write([]byte(trimmed)) _, _ = hasher.Write([]byte{0}) } writeValue(req.SessionHash) writeValue(req.PreviousResponseID) writeValue(req.RequestedModel) if req.GroupID != nil { _, _ = hasher.Write([]byte(strconv.FormatInt(*req.GroupID, 10))) } seed := hasher.Sum64() // 对“无会话锚点”的纯负载均衡请求引入时间熵,避免固定命中同一账号。 if strings.TrimSpace(req.SessionHash) == "" && strings.TrimSpace(req.PreviousResponseID) == "" { seed ^= uint64(time.Now().UnixNano()) } if seed == 0 { seed = uint64(time.Now().UnixNano()) ^ 0x9e3779b97f4a7c15 } return seed } func buildOpenAIWeightedSelectionOrder( candidates []openAIAccountCandidateScore, req OpenAIAccountScheduleRequest, ) []openAIAccountCandidateScore { if len(candidates) <= 1 { return append([]openAIAccountCandidateScore(nil), candidates...) } pool := append([]openAIAccountCandidateScore(nil), candidates...) weights := make([]float64, len(pool)) minScore := pool[0].score for i := 1; i < len(pool); i++ { if pool[i].score < minScore { minScore = pool[i].score } } for i := range pool { // 将 top-K 分值平移到正区间,避免“单一最高分账号”长期垄断。 weight := (pool[i].score - minScore) + 1.0 if math.IsNaN(weight) || math.IsInf(weight, 0) || weight <= 0 { weight = 1.0 } weights[i] = weight } order := make([]openAIAccountCandidateScore, 0, len(pool)) rng := newOpenAISelectionRNG(deriveOpenAISelectionSeed(req)) for len(pool) > 0 { total := 0.0 for _, w := range weights { total += w } selectedIdx := 0 if total > 0 { r := rng.nextFloat64() * total acc := 0.0 for i, w := range weights { acc += w if r <= acc { selectedIdx = i break } } } else { selectedIdx = int(rng.nextUint64() % uint64(len(pool))) } order = append(order, pool[selectedIdx]) pool = append(pool[:selectedIdx], pool[selectedIdx+1:]...) weights = append(weights[:selectedIdx], weights[selectedIdx+1:]...) } return order } func (s *defaultOpenAIAccountScheduler) selectByLoadBalance( ctx context.Context, req OpenAIAccountScheduleRequest, ) (*AccountSelectionResult, int, int, float64, error) { accounts, err := s.service.listSchedulableAccounts(ctx, req.GroupID) if err != nil { return nil, 0, 0, 0, err } if len(accounts) == 0 { return nil, 0, 0, 0, errors.New("no available OpenAI accounts") } filtered := make([]*Account, 0, len(accounts)) loadReq := make([]AccountWithConcurrency, 0, len(accounts)) for i := range accounts { account := &accounts[i] if req.ExcludedIDs != nil { if _, excluded := req.ExcludedIDs[account.ID]; excluded { continue } } if !account.IsSchedulable() || !account.IsOpenAI() { continue } if req.RequestedModel != "" && !account.IsModelSupported(req.RequestedModel) { continue } if !s.isAccountTransportCompatible(account, req.RequiredTransport) { continue } filtered = append(filtered, account) loadReq = append(loadReq, AccountWithConcurrency{ ID: account.ID, MaxConcurrency: account.Concurrency, }) } if len(filtered) == 0 { return nil, 0, 0, 0, errors.New("no available OpenAI accounts") } loadMap := map[int64]*AccountLoadInfo{} if s.service.concurrencyService != nil { if batchLoad, loadErr := s.service.concurrencyService.GetAccountsLoadBatch(ctx, loadReq); loadErr == nil { loadMap = batchLoad } } minPriority, maxPriority := filtered[0].Priority, filtered[0].Priority maxWaiting := 1 loadRateSum := 0.0 loadRateSumSquares := 0.0 minTTFT, maxTTFT := 0.0, 0.0 hasTTFTSample := false candidates := make([]openAIAccountCandidateScore, 0, len(filtered)) for _, account := range filtered { loadInfo := loadMap[account.ID] if loadInfo == nil { loadInfo = &AccountLoadInfo{AccountID: account.ID} } if account.Priority < minPriority { minPriority = account.Priority } if account.Priority > maxPriority { maxPriority = account.Priority } if loadInfo.WaitingCount > maxWaiting { maxWaiting = loadInfo.WaitingCount } errorRate, ttft, hasTTFT := s.stats.snapshot(account.ID) if hasTTFT && ttft > 0 { if !hasTTFTSample { minTTFT, maxTTFT = ttft, ttft hasTTFTSample = true } else { if ttft < minTTFT { minTTFT = ttft } if ttft > maxTTFT { maxTTFT = ttft } } } loadRate := float64(loadInfo.LoadRate) loadRateSum += loadRate loadRateSumSquares += loadRate * loadRate candidates = append(candidates, openAIAccountCandidateScore{ account: account, loadInfo: loadInfo, errorRate: errorRate, ttft: ttft, hasTTFT: hasTTFT, }) } loadSkew := calcLoadSkewByMoments(loadRateSum, loadRateSumSquares, len(candidates)) weights := s.service.openAIWSSchedulerWeights() for i := range candidates { item := &candidates[i] priorityFactor := 1.0 if maxPriority > minPriority { priorityFactor = 1 - float64(item.account.Priority-minPriority)/float64(maxPriority-minPriority) } loadFactor := 1 - clamp01(float64(item.loadInfo.LoadRate)/100.0) queueFactor := 1 - clamp01(float64(item.loadInfo.WaitingCount)/float64(maxWaiting)) errorFactor := 1 - clamp01(item.errorRate) ttftFactor := 0.5 if item.hasTTFT && hasTTFTSample && maxTTFT > minTTFT { ttftFactor = 1 - clamp01((item.ttft-minTTFT)/(maxTTFT-minTTFT)) } item.score = weights.Priority*priorityFactor + weights.Load*loadFactor + weights.Queue*queueFactor + weights.ErrorRate*errorFactor + weights.TTFT*ttftFactor } topK := s.service.openAIWSLBTopK() if topK > len(candidates) { topK = len(candidates) } if topK <= 0 { topK = 1 } rankedCandidates := selectTopKOpenAICandidates(candidates, topK) selectionOrder := buildOpenAIWeightedSelectionOrder(rankedCandidates, req) for i := 0; i < len(selectionOrder); i++ { candidate := selectionOrder[i] result, acquireErr := s.service.tryAcquireAccountSlot(ctx, candidate.account.ID, candidate.account.Concurrency) if acquireErr != nil { return nil, len(candidates), topK, loadSkew, acquireErr } if result != nil && result.Acquired { if req.SessionHash != "" { _ = s.service.BindStickySession(ctx, req.GroupID, req.SessionHash, candidate.account.ID) } return &AccountSelectionResult{ Account: candidate.account, Acquired: true, ReleaseFunc: result.ReleaseFunc, }, len(candidates), topK, loadSkew, nil } } cfg := s.service.schedulingConfig() candidate := selectionOrder[0] return &AccountSelectionResult{ Account: candidate.account, WaitPlan: &AccountWaitPlan{ AccountID: candidate.account.ID, MaxConcurrency: candidate.account.Concurrency, Timeout: cfg.FallbackWaitTimeout, MaxWaiting: cfg.FallbackMaxWaiting, }, }, len(candidates), topK, loadSkew, nil } func (s *defaultOpenAIAccountScheduler) isAccountTransportCompatible(account *Account, requiredTransport OpenAIUpstreamTransport) bool { // HTTP 入站可回退到 HTTP 线路,不需要在账号选择阶段做传输协议强过滤。 if requiredTransport == OpenAIUpstreamTransportAny || requiredTransport == OpenAIUpstreamTransportHTTPSSE { return true } if s == nil || s.service == nil || account == nil { return false } return s.service.getOpenAIWSProtocolResolver().Resolve(account).Transport == requiredTransport } func (s *defaultOpenAIAccountScheduler) ReportResult(accountID int64, success bool, firstTokenMs *int) { if s == nil || s.stats == nil { return } s.stats.report(accountID, success, firstTokenMs) } func (s *defaultOpenAIAccountScheduler) ReportSwitch() { if s == nil { return } s.metrics.recordSwitch() } func (s *defaultOpenAIAccountScheduler) SnapshotMetrics() OpenAIAccountSchedulerMetricsSnapshot { if s == nil { return OpenAIAccountSchedulerMetricsSnapshot{} } selectTotal := s.metrics.selectTotal.Load() prevHit := s.metrics.stickyPreviousHitTotal.Load() sessionHit := s.metrics.stickySessionHitTotal.Load() switchTotal := s.metrics.accountSwitchTotal.Load() latencyTotal := s.metrics.latencyMsTotal.Load() loadSkewTotal := s.metrics.loadSkewMilliTotal.Load() snapshot := OpenAIAccountSchedulerMetricsSnapshot{ SelectTotal: selectTotal, StickyPreviousHitTotal: prevHit, StickySessionHitTotal: sessionHit, LoadBalanceSelectTotal: s.metrics.loadBalanceSelectTotal.Load(), AccountSwitchTotal: switchTotal, SchedulerLatencyMsTotal: latencyTotal, RuntimeStatsAccountCount: s.stats.size(), } if selectTotal > 0 { snapshot.SchedulerLatencyMsAvg = float64(latencyTotal) / float64(selectTotal) snapshot.StickyHitRatio = float64(prevHit+sessionHit) / float64(selectTotal) snapshot.AccountSwitchRate = float64(switchTotal) / float64(selectTotal) snapshot.LoadSkewAvg = float64(loadSkewTotal) / 1000 / float64(selectTotal) } return snapshot } func (s *OpenAIGatewayService) getOpenAIAccountScheduler() OpenAIAccountScheduler { if s == nil { return nil } s.openaiSchedulerOnce.Do(func() { if s.openaiAccountStats == nil { s.openaiAccountStats = newOpenAIAccountRuntimeStats() } if s.openaiScheduler == nil { s.openaiScheduler = newDefaultOpenAIAccountScheduler(s, s.openaiAccountStats) } }) return s.openaiScheduler } func (s *OpenAIGatewayService) SelectAccountWithScheduler( ctx context.Context, groupID *int64, previousResponseID string, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, requiredTransport OpenAIUpstreamTransport, ) (*AccountSelectionResult, OpenAIAccountScheduleDecision, error) { decision := OpenAIAccountScheduleDecision{} scheduler := s.getOpenAIAccountScheduler() if scheduler == nil { selection, err := s.SelectAccountWithLoadAwareness(ctx, groupID, sessionHash, requestedModel, excludedIDs) decision.Layer = openAIAccountScheduleLayerLoadBalance return selection, decision, err } var stickyAccountID int64 if sessionHash != "" && s.cache != nil { if accountID, err := s.getStickySessionAccountID(ctx, groupID, sessionHash); err == nil && accountID > 0 { stickyAccountID = accountID } } return scheduler.Select(ctx, OpenAIAccountScheduleRequest{ GroupID: groupID, SessionHash: sessionHash, StickyAccountID: stickyAccountID, PreviousResponseID: previousResponseID, RequestedModel: requestedModel, RequiredTransport: requiredTransport, ExcludedIDs: excludedIDs, }) } func (s *OpenAIGatewayService) ReportOpenAIAccountScheduleResult(accountID int64, success bool, firstTokenMs *int) { scheduler := s.getOpenAIAccountScheduler() if scheduler == nil { return } scheduler.ReportResult(accountID, success, firstTokenMs) } func (s *OpenAIGatewayService) RecordOpenAIAccountSwitch() { scheduler := s.getOpenAIAccountScheduler() if scheduler == nil { return } scheduler.ReportSwitch() } func (s *OpenAIGatewayService) SnapshotOpenAIAccountSchedulerMetrics() OpenAIAccountSchedulerMetricsSnapshot { scheduler := s.getOpenAIAccountScheduler() if scheduler == nil { return OpenAIAccountSchedulerMetricsSnapshot{} } return scheduler.SnapshotMetrics() } func (s *OpenAIGatewayService) openAIWSSessionStickyTTL() time.Duration { if s != nil && s.cfg != nil && s.cfg.Gateway.OpenAIWS.StickySessionTTLSeconds > 0 { return time.Duration(s.cfg.Gateway.OpenAIWS.StickySessionTTLSeconds) * time.Second } return openaiStickySessionTTL } func (s *OpenAIGatewayService) openAIWSLBTopK() int { if s != nil && s.cfg != nil && s.cfg.Gateway.OpenAIWS.LBTopK > 0 { return s.cfg.Gateway.OpenAIWS.LBTopK } return 7 } func (s *OpenAIGatewayService) openAIWSSchedulerWeights() GatewayOpenAIWSSchedulerScoreWeightsView { if s != nil && s.cfg != nil { return GatewayOpenAIWSSchedulerScoreWeightsView{ Priority: s.cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Priority, Load: s.cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Load, Queue: s.cfg.Gateway.OpenAIWS.SchedulerScoreWeights.Queue, ErrorRate: s.cfg.Gateway.OpenAIWS.SchedulerScoreWeights.ErrorRate, TTFT: s.cfg.Gateway.OpenAIWS.SchedulerScoreWeights.TTFT, } } return GatewayOpenAIWSSchedulerScoreWeightsView{ Priority: 1.0, Load: 1.0, Queue: 0.7, ErrorRate: 0.8, TTFT: 0.5, } } type GatewayOpenAIWSSchedulerScoreWeightsView struct { Priority float64 Load float64 Queue float64 ErrorRate float64 TTFT float64 } func clamp01(value float64) float64 { switch { case value < 0: return 0 case value > 1: return 1 default: return value } } func calcLoadSkewByMoments(sum float64, sumSquares float64, count int) float64 { if count <= 1 { return 0 } mean := sum / float64(count) variance := sumSquares/float64(count) - mean*mean if variance < 0 { variance = 0 } return math.Sqrt(variance) }