feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟(严格限流)
- throttle: 仅 RPM 自适应前置延迟,不阻塞并发(软性限速)

后端:
- config: 新增 Mode 字段,保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService(Lua 锁/延迟算法/清理 worker)
- repository: 新增 UserMsgQueueCache(Redis Lua acquire/release/force-release)
- handler: 新增 UserMsgQueueHelper(SSE ping + 等待循环 + throttle)
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI(关闭/软性限速/串行队列)替代 toggle 开关
- BulkEdit 支持 null 语义(不修改)
- i18n 中英文文案

通过 6 轮专家评审(42 次 review)、golangci-lint、单元测试、集成测试。
This commit is contained in:
QTom
2026-03-03 01:02:39 +08:00
parent 7abec1888f
commit a9285b8a94
21 changed files with 1099 additions and 15 deletions

View File

@@ -686,6 +686,27 @@
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBufferHint') }}</p>
</div>
</div>
</div>
<!-- 用户消息限速模式独立于 RPM 开关始终可见 -->
<div class="mt-4">
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueue') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400 mb-2">
{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueueHint') }}
</p>
<div class="flex space-x-2">
<button type="button" v-for="opt in umqModeOptions" :key="opt.value"
@click="userMsgQueueMode = userMsgQueueMode === opt.value ? null : opt.value"
:class="[
'px-3 py-1.5 text-sm rounded-md border transition-colors',
userMsgQueueMode === opt.value
? 'bg-primary-600 text-white border-primary-600'
: 'bg-white dark:bg-dark-700 text-gray-700 dark:text-gray-300 border-gray-300 dark:border-dark-500 hover:bg-gray-50 dark:hover:bg-dark-600'
]">
{{ opt.label }}
</button>
</div>
</div>
</div>
@@ -876,6 +897,12 @@ const rpmLimitEnabled = ref(false)
const bulkBaseRpm = ref<number | null>(null)
const bulkRpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
const bulkRpmStickyBuffer = ref<number | null>(null)
const userMsgQueueMode = ref<string | null>(null)
const umqModeOptions = computed(() => [
{ value: '', label: t('admin.accounts.quotaControl.rpmLimit.umqModeOff') },
{ value: 'throttle', label: t('admin.accounts.quotaControl.rpmLimit.umqModeThrottle') },
{ value: 'serialize', label: t('admin.accounts.quotaControl.rpmLimit.umqModeSerialize') },
])
// All models list (combined Anthropic + OpenAI + Gemini)
const allModels = [
@@ -1249,6 +1276,14 @@ const buildUpdatePayload = (): Record<string, unknown> | null => {
updates.extra = extra
}
// UMQ mode独立于 RPM 保存)
if (userMsgQueueMode.value !== null) {
if (!updates.extra) updates.extra = {}
const umqExtra = updates.extra as Record<string, unknown>
umqExtra.user_msg_queue_mode = userMsgQueueMode.value // '' = 清除账号级覆盖
umqExtra.user_msg_queue_enabled = false // 清理旧字段JSONB merge
}
return Object.keys(updates).length > 0 ? updates : null
}
@@ -1309,7 +1344,8 @@ const handleSubmit = async () => {
enableRateMultiplier.value ||
enableStatus.value ||
enableGroups.value ||
enableRpmLimit.value
enableRpmLimit.value ||
userMsgQueueMode.value !== null
if (!hasAnyFieldEnabled) {
appStore.showError(t('admin.accounts.bulkEdit.noFieldsSelected'))
@@ -1414,6 +1450,11 @@ watch(
rateMultiplier.value = 1
status.value = 'active'
groupIds.value = []
rpmLimitEnabled.value = false
bulkBaseRpm.value = null
bulkRpmStrategy.value = 'tiered'
bulkRpmStickyBuffer.value = null
userMsgQueueMode.value = null
// Reset mixed channel warning state
showMixedChannelWarning.value = false

View File

@@ -1625,6 +1625,27 @@
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBufferHint') }}</p>
</div>
</div>
<!-- 用户消息限速模式独立于 RPM 开关始终可见 -->
<div class="mt-4">
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueue') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400 mb-2">
{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueueHint') }}
</p>
<div class="flex space-x-2">
<button type="button" v-for="opt in umqModeOptions" :key="opt.value"
@click="userMsgQueueMode = opt.value"
:class="[
'px-3 py-1.5 text-sm rounded-md border transition-colors',
userMsgQueueMode === opt.value
? 'bg-primary-600 text-white border-primary-600'
: 'bg-white dark:bg-dark-700 text-gray-700 dark:text-gray-300 border-gray-300 dark:border-dark-500 hover:bg-gray-50 dark:hover:bg-dark-600'
]">
{{ opt.label }}
</button>
</div>
</div>
</div>
@@ -2489,6 +2510,12 @@ const rpmLimitEnabled = ref(false)
const baseRpm = ref<number | null>(null)
const rpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
const rpmStickyBuffer = ref<number | null>(null)
const userMsgQueueMode = ref('')
const umqModeOptions = computed(() => [
{ value: '', label: t('admin.accounts.quotaControl.rpmLimit.umqModeOff') },
{ value: 'throttle', label: t('admin.accounts.quotaControl.rpmLimit.umqModeThrottle') },
{ value: 'serialize', label: t('admin.accounts.quotaControl.rpmLimit.umqModeSerialize') },
])
const tlsFingerprintEnabled = ref(false)
const sessionIdMaskingEnabled = ref(false)
const cacheTTLOverrideEnabled = ref(false)
@@ -3117,6 +3144,7 @@ const resetForm = () => {
baseRpm.value = null
rpmStrategy.value = 'tiered'
rpmStickyBuffer.value = null
userMsgQueueMode.value = ''
tlsFingerprintEnabled.value = false
sessionIdMaskingEnabled.value = false
cacheTTLOverrideEnabled.value = false
@@ -4035,6 +4063,11 @@ const handleAnthropicExchange = async (authCode: string) => {
}
}
// UMQ mode独立于 RPM
if (userMsgQueueMode.value) {
extra.user_msg_queue_mode = userMsgQueueMode.value
}
// Add TLS fingerprint settings
if (tlsFingerprintEnabled.value) {
extra.enable_tls_fingerprint = true
@@ -4142,6 +4175,11 @@ const handleCookieAuth = async (sessionKey: string) => {
}
}
// UMQ mode独立于 RPM
if (userMsgQueueMode.value) {
extra.user_msg_queue_mode = userMsgQueueMode.value
}
// Add TLS fingerprint settings
if (tlsFingerprintEnabled.value) {
extra.enable_tls_fingerprint = true

View File

@@ -1035,6 +1035,27 @@
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBufferHint') }}</p>
</div>
</div>
<!-- 用户消息限速模式独立于 RPM 开关始终可见 -->
<div class="mt-4">
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueue') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400 mb-2">
{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueueHint') }}
</p>
<div class="flex space-x-2">
<button type="button" v-for="opt in umqModeOptions" :key="opt.value"
@click="userMsgQueueMode = opt.value"
:class="[
'px-3 py-1.5 text-sm rounded-md border transition-colors',
userMsgQueueMode === opt.value
? 'bg-primary-600 text-white border-primary-600'
: 'bg-white dark:bg-dark-700 text-gray-700 dark:text-gray-300 border-gray-300 dark:border-dark-500 hover:bg-gray-50 dark:hover:bg-dark-600'
]">
{{ opt.label }}
</button>
</div>
</div>
</div>
@@ -1347,6 +1368,12 @@ const rpmLimitEnabled = ref(false)
const baseRpm = ref<number | null>(null)
const rpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
const rpmStickyBuffer = ref<number | null>(null)
const userMsgQueueMode = ref('')
const umqModeOptions = computed(() => [
{ value: '', label: t('admin.accounts.quotaControl.rpmLimit.umqModeOff') },
{ value: 'throttle', label: t('admin.accounts.quotaControl.rpmLimit.umqModeThrottle') },
{ value: 'serialize', label: t('admin.accounts.quotaControl.rpmLimit.umqModeSerialize') },
])
const tlsFingerprintEnabled = ref(false)
const sessionIdMaskingEnabled = ref(false)
const cacheTTLOverrideEnabled = ref(false)
@@ -1810,6 +1837,7 @@ function loadQuotaControlSettings(account: Account) {
baseRpm.value = null
rpmStrategy.value = 'tiered'
rpmStickyBuffer.value = null
userMsgQueueMode.value = ''
tlsFingerprintEnabled.value = false
sessionIdMaskingEnabled.value = false
cacheTTLOverrideEnabled.value = false
@@ -1841,6 +1869,9 @@ function loadQuotaControlSettings(account: Account) {
rpmStickyBuffer.value = account.rpm_sticky_buffer ?? null
}
// UMQ mode独立于 RPM 加载,防止编辑无 RPM 账号时丢失已有配置)
userMsgQueueMode.value = account.user_msg_queue_mode ?? ''
// Load TLS fingerprint setting
if (account.enable_tls_fingerprint === true) {
tlsFingerprintEnabled.value = true
@@ -2166,6 +2197,14 @@ const handleSubmit = async () => {
delete newExtra.rpm_sticky_buffer
}
// UMQ mode独立于 RPM 保存)
if (userMsgQueueMode.value) {
newExtra.user_msg_queue_mode = userMsgQueueMode.value
} else {
delete newExtra.user_msg_queue_mode
}
delete newExtra.user_msg_queue_enabled // 清理旧字段
// TLS fingerprint setting
if (tlsFingerprintEnabled.value) {
newExtra.enable_tls_fingerprint = true

View File

@@ -1864,7 +1864,12 @@ export default {
strategyHint: 'Tiered: gradually restrict when exceeded; Sticky Exempt: existing sessions unrestricted',
stickyBuffer: 'Sticky Buffer',
stickyBufferPlaceholder: 'Default: 20% of base RPM',
stickyBufferHint: 'Extra requests allowed for sticky sessions after exceeding base RPM. Leave empty to use default (20% of base RPM, min 1)'
stickyBufferHint: 'Extra requests allowed for sticky sessions after exceeding base RPM. Leave empty to use default (20% of base RPM, min 1)',
userMsgQueue: 'User Message Rate Control',
userMsgQueueHint: 'Rate-limit user messages to avoid triggering upstream RPM limits',
umqModeOff: 'Off',
umqModeThrottle: 'Throttle',
umqModeSerialize: 'Serialize',
},
tlsFingerprint: {
label: 'TLS Fingerprint Simulation',

View File

@@ -2007,7 +2007,12 @@ export default {
strategyHint: '三区模型: 超限后逐步限制; 粘性豁免: 已有会话不受限',
stickyBuffer: '粘性缓冲区',
stickyBufferPlaceholder: '默认: base RPM 的 20%',
stickyBufferHint: '超过 base RPM 后粘性会话额外允许的请求数。为空则使用默认值base RPM 的 20%,最小为 1'
stickyBufferHint: '超过 base RPM 后粘性会话额外允许的请求数。为空则使用默认值base RPM 的 20%,最小为 1',
userMsgQueue: '用户消息限速',
userMsgQueueHint: '对用户消息施加发送限制,避免触发上游 RPM 限制',
umqModeOff: '关闭',
umqModeThrottle: '软性限速',
umqModeSerialize: '串行队列',
},
tlsFingerprint: {
label: 'TLS 指纹模拟',

View File

@@ -665,6 +665,7 @@ export interface Account {
base_rpm?: number | null
rpm_strategy?: string | null
rpm_sticky_buffer?: number | null
user_msg_queue_mode?: string | null // "serialize" | "throttle" | null
// TLS指纹伪装仅 Anthropic OAuth/SetupToken 账号有效)
enable_tls_fingerprint?: boolean | null