feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容 - 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行 - 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试 - 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 12:56:57 +08:00
parent 50b9897182
commit 33db7a0fb6
15 changed files with 1575 additions and 70 deletions
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -26,6 +26,7 @@ type OpenAIGatewayHandler struct {
 	gatewayService          *service.OpenAIGatewayService
 	billingCacheService     *service.BillingCacheService
 	apiKeyService           *service.APIKeyService
+	usageRecordWorkerPool   *service.UsageRecordWorkerPool
 	errorPassthroughService *service.ErrorPassthroughService
 	concurrencyHelper       *ConcurrencyHelper
 	maxAccountSwitches      int
@@ -37,6 +38,7 @@ func NewOpenAIGatewayHandler(
 	concurrencyService *service.ConcurrencyService,
 	billingCacheService *service.BillingCacheService,
 	apiKeyService *service.APIKeyService,
+	usageRecordWorkerPool *service.UsageRecordWorkerPool,
 	errorPassthroughService *service.ErrorPassthroughService,
 	cfg *config.Config,
 ) *OpenAIGatewayHandler {
@@ -52,6 +54,7 @@ func NewOpenAIGatewayHandler(
 		gatewayService:          gatewayService,
 		billingCacheService:     billingCacheService,
 		apiKeyService:           apiKeyService,
+		usageRecordWorkerPool:   usageRecordWorkerPool,
 		errorPassthroughService: errorPassthroughService,
 		concurrencyHelper:       NewConcurrencyHelper(concurrencyService, SSEPingFormatComment, pingInterval),
 		maxAccountSwitches:      maxAccountSwitches,
@@ -378,18 +381,16 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 		userAgent := c.GetHeader("User-Agent")
 		clientIP := ip.GetClientIP(c)

-		// Async record usage
-		go func(result *service.OpenAIForwardResult, usedAccount *service.Account, ua, ip string) {
-			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-			defer cancel()
+		// 使用量记录通过有界 worker 池提交，避免请求热路径创建无界 goroutine。
+		h.submitUsageRecordTask(func(ctx context.Context) {
 			if err := h.gatewayService.RecordUsage(ctx, &service.OpenAIRecordUsageInput{
 				Result:        result,
 				APIKey:        apiKey,
 				User:          apiKey.User,
-				Account:       usedAccount,
+				Account:       account,
 				Subscription:  subscription,
-				UserAgent:     ua,
-				IPAddress:     ip,
+				UserAgent:     userAgent,
+				IPAddress:     clientIP,
 				APIKeyService: h.apiKeyService,
 			}); err != nil {
 				logger.L().With(
@@ -398,10 +399,10 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 					zap.Int64("api_key_id", apiKey.ID),
 					zap.Any("group_id", apiKey.GroupID),
 					zap.String("model", reqModel),
-					zap.Int64("account_id", usedAccount.ID),
+					zap.Int64("account_id", account.ID),
 				).Error("openai.record_usage_failed", zap.Error(err))
 			}
-		}(result, account, userAgent, clientIP)
+		})
 		reqLog.Debug("openai.request_completed",
 			zap.Int64("account_id", account.ID),
 			zap.Int("switch_count", switchCount),
@@ -432,6 +433,20 @@ func getContextInt64(c *gin.Context, key string) (int64, bool) {
 	}
 }

+func (h *OpenAIGatewayHandler) submitUsageRecordTask(task service.UsageRecordTask) {
+	if task == nil {
+		return
+	}
+	if h.usageRecordWorkerPool != nil {
+		h.usageRecordWorkerPool.Submit(task)
+		return
+	}
+	// 回退路径：worker 池未注入时同步执行，避免退回到无界 goroutine 模式。
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	task(ctx)
+}
+
 // handleConcurrencyError handles concurrency-related errors with proper 429 response
 func (h *OpenAIGatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotType string, streamStarted bool) {
 	h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error",