fix(流式): 提升SSE稳定性并统一超时配置
- 扩展SSE行长与间隔超时处理,补充keepalive - 写入失败与超长行时发送错误事件,修复并发释放 - 同步默认配置与示例配置,更新Caddy超时/压缩规则 - 新增OpenAI流式超时与超长行测试 测试: go test ./...
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/service"
|
||||
@@ -26,8 +27,8 @@ import (
|
||||
const (
|
||||
// maxConcurrencyWait 等待并发槽位的最大时间
|
||||
maxConcurrencyWait = 30 * time.Second
|
||||
// pingInterval 流式响应等待时发送 ping 的间隔
|
||||
pingInterval = 15 * time.Second
|
||||
// defaultPingInterval 流式响应等待时发送 ping 的默认间隔
|
||||
defaultPingInterval = 10 * time.Second
|
||||
// initialBackoff 初始退避时间
|
||||
initialBackoff = 100 * time.Millisecond
|
||||
// backoffMultiplier 退避时间乘数(指数退避)
|
||||
@@ -44,6 +45,8 @@ const (
|
||||
SSEPingFormatClaude SSEPingFormat = "data: {\"type\": \"ping\"}\n\n"
|
||||
// SSEPingFormatNone indicates no ping should be sent (e.g., OpenAI has no ping spec)
|
||||
SSEPingFormatNone SSEPingFormat = ""
|
||||
// SSEPingFormatComment is an SSE comment ping for OpenAI/Codex CLI clients
|
||||
SSEPingFormatComment SSEPingFormat = ":\n\n"
|
||||
)
|
||||
|
||||
// ConcurrencyError represents a concurrency limit error with context
|
||||
@@ -63,16 +66,38 @@ func (e *ConcurrencyError) Error() string {
|
||||
type ConcurrencyHelper struct {
|
||||
concurrencyService *service.ConcurrencyService
|
||||
pingFormat SSEPingFormat
|
||||
pingInterval time.Duration
|
||||
}
|
||||
|
||||
// NewConcurrencyHelper creates a new ConcurrencyHelper
|
||||
func NewConcurrencyHelper(concurrencyService *service.ConcurrencyService, pingFormat SSEPingFormat) *ConcurrencyHelper {
|
||||
func NewConcurrencyHelper(concurrencyService *service.ConcurrencyService, pingFormat SSEPingFormat, pingInterval time.Duration) *ConcurrencyHelper {
|
||||
if pingInterval <= 0 {
|
||||
pingInterval = defaultPingInterval
|
||||
}
|
||||
return &ConcurrencyHelper{
|
||||
concurrencyService: concurrencyService,
|
||||
pingFormat: pingFormat,
|
||||
pingInterval: pingInterval,
|
||||
}
|
||||
}
|
||||
|
||||
// wrapReleaseOnDone ensures release runs at most once and still triggers on context cancellation.
|
||||
// 用于避免客户端断开或上游超时导致的并发槽位泄漏。
|
||||
func wrapReleaseOnDone(ctx context.Context, releaseFunc func()) func() {
|
||||
if releaseFunc == nil {
|
||||
return nil
|
||||
}
|
||||
var once sync.Once
|
||||
wrapped := func() {
|
||||
once.Do(releaseFunc)
|
||||
}
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
wrapped()
|
||||
}()
|
||||
return wrapped
|
||||
}
|
||||
|
||||
// IncrementWaitCount increments the wait count for a user
|
||||
func (h *ConcurrencyHelper) IncrementWaitCount(ctx context.Context, userID int64, maxWait int) (bool, error) {
|
||||
return h.concurrencyService.IncrementWaitCount(ctx, userID, maxWait)
|
||||
@@ -174,7 +199,7 @@ func (h *ConcurrencyHelper) waitForSlotWithPingTimeout(c *gin.Context, slotType
|
||||
// Only create ping ticker if ping is needed
|
||||
var pingCh <-chan time.Time
|
||||
if needPing {
|
||||
pingTicker := time.NewTicker(pingInterval)
|
||||
pingTicker := time.NewTicker(h.pingInterval)
|
||||
defer pingTicker.Stop()
|
||||
pingCh = pingTicker.C
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user