diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go index c6b969bc..9931052d 100644 --- a/backend/internal/handler/openai_gateway_handler.go +++ b/backend/internal/handler/openai_gateway_handler.go @@ -174,9 +174,11 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) { log.Printf("Account wait queue full: account=%d", account.ID) h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted) return - } - accountWaitRelease = func() { - h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID) + } else { + // Only set release function if increment succeeded + accountWaitRelease = func() { + h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID) + } } accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(