feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟(严格限流)
- throttle: 仅 RPM 自适应前置延迟,不阻塞并发(软性限速)

后端:
- config: 新增 Mode 字段,保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService(Lua 锁/延迟算法/清理 worker)
- repository: 新增 UserMsgQueueCache(Redis Lua acquire/release/force-release)
- handler: 新增 UserMsgQueueHelper(SSE ping + 等待循环 + throttle)
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI(关闭/软性限速/串行队列)替代 toggle 开关
- BulkEdit 支持 null 语义(不修改)
- i18n 中英文文案

通过 6 轮专家评审(42 次 review)、golangci-lint、单元测试、集成测试。
This commit is contained in:
QTom
2026-03-03 01:02:39 +08:00
parent 7abec1888f
commit a9285b8a94
21 changed files with 1099 additions and 15 deletions

View File

@@ -686,6 +686,27 @@
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBufferHint') }}</p>
</div>
</div>
</div>
<!-- 用户消息限速模式独立于 RPM 开关始终可见 -->
<div class="mt-4">
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueue') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400 mb-2">
{{ t('admin.accounts.quotaControl.rpmLimit.userMsgQueueHint') }}
</p>
<div class="flex space-x-2">
<button type="button" v-for="opt in umqModeOptions" :key="opt.value"
@click="userMsgQueueMode = userMsgQueueMode === opt.value ? null : opt.value"
:class="[
'px-3 py-1.5 text-sm rounded-md border transition-colors',
userMsgQueueMode === opt.value
? 'bg-primary-600 text-white border-primary-600'
: 'bg-white dark:bg-dark-700 text-gray-700 dark:text-gray-300 border-gray-300 dark:border-dark-500 hover:bg-gray-50 dark:hover:bg-dark-600'
]">
{{ opt.label }}
</button>
</div>
</div>
</div>
@@ -876,6 +897,12 @@ const rpmLimitEnabled = ref(false)
const bulkBaseRpm = ref<number | null>(null)
const bulkRpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
const bulkRpmStickyBuffer = ref<number | null>(null)
const userMsgQueueMode = ref<string | null>(null)
const umqModeOptions = computed(() => [
{ value: '', label: t('admin.accounts.quotaControl.rpmLimit.umqModeOff') },
{ value: 'throttle', label: t('admin.accounts.quotaControl.rpmLimit.umqModeThrottle') },
{ value: 'serialize', label: t('admin.accounts.quotaControl.rpmLimit.umqModeSerialize') },
])
// All models list (combined Anthropic + OpenAI + Gemini)
const allModels = [
@@ -1249,6 +1276,14 @@ const buildUpdatePayload = (): Record<string, unknown> | null => {
updates.extra = extra
}
// UMQ mode独立于 RPM 保存)
if (userMsgQueueMode.value !== null) {
if (!updates.extra) updates.extra = {}
const umqExtra = updates.extra as Record<string, unknown>
umqExtra.user_msg_queue_mode = userMsgQueueMode.value // '' = 清除账号级覆盖
umqExtra.user_msg_queue_enabled = false // 清理旧字段JSONB merge
}
return Object.keys(updates).length > 0 ? updates : null
}
@@ -1309,7 +1344,8 @@ const handleSubmit = async () => {
enableRateMultiplier.value ||
enableStatus.value ||
enableGroups.value ||
enableRpmLimit.value
enableRpmLimit.value ||
userMsgQueueMode.value !== null
if (!hasAnyFieldEnabled) {
appStore.showError(t('admin.accounts.bulkEdit.noFieldsSelected'))
@@ -1414,6 +1450,11 @@ watch(
rateMultiplier.value = 1
status.value = 'active'
groupIds.value = []
rpmLimitEnabled.value = false
bulkBaseRpm.value = null
bulkRpmStrategy.value = 'tiered'
bulkRpmStickyBuffer.value = null
userMsgQueueMode.value = null
// Reset mixed channel warning state
showMixedChannelWarning.value = false