perf(gateway): 优化负载感知调度

主要改进:
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性
This commit is contained in:
ianshaw
2026-01-03 06:32:51 -08:00
parent 26106eb0ac
commit acb718d355
7 changed files with 369 additions and 310 deletions

View File

@@ -75,9 +75,19 @@ func ParseGatewayRequest(body []byte) (*ParsedRequest, error) {
// FilterThinkingBlocks removes thinking blocks from request body
// Returns filtered body or original body if filtering fails (fail-safe)
// This prevents 400 errors from invalid thinking block signatures
//
// Strategy:
// - When thinking.type != "enabled": Remove all thinking blocks
// - When thinking.type == "enabled": Only remove thinking blocks without valid signatures
// (blocks with missing/empty/dummy signatures that would cause 400 errors)
func FilterThinkingBlocks(body []byte) []byte {
// Fast path: if body doesn't contain "thinking", skip parsing
if !bytes.Contains(body, []byte("thinking")) {
if !bytes.Contains(body, []byte(`"type":"thinking"`)) &&
!bytes.Contains(body, []byte(`"type": "thinking"`)) &&
!bytes.Contains(body, []byte(`"type":"redacted_thinking"`)) &&
!bytes.Contains(body, []byte(`"type": "redacted_thinking"`)) &&
!bytes.Contains(body, []byte(`"thinking":`)) &&
!bytes.Contains(body, []byte(`"thinking" :`)) {
return body
}
@@ -86,6 +96,14 @@ func FilterThinkingBlocks(body []byte) []byte {
return body // Return original on parse error
}
// Check if thinking is enabled
thinkingEnabled := false
if thinking, ok := req["thinking"].(map[string]any); ok {
if thinkType, ok := thinking["type"].(string); ok && thinkType == "enabled" {
thinkingEnabled = true
}
}
messages, ok := req["messages"].([]any)
if !ok {
return body // No messages array
@@ -98,6 +116,7 @@ func FilterThinkingBlocks(body []byte) []byte {
continue
}
role, _ := msgMap["role"].(string)
content, ok := msgMap["content"].([]any)
if !ok {
continue
@@ -106,6 +125,7 @@ func FilterThinkingBlocks(body []byte) []byte {
// Filter thinking blocks from content array
newContent := make([]any, 0, len(content))
filteredThisMessage := false
for _, block := range content {
blockMap, ok := block.(map[string]any)
if !ok {
@@ -114,22 +134,34 @@ func FilterThinkingBlocks(body []byte) []byte {
}
blockType, _ := blockMap["type"].(string)
// Explicit Anthropic-style thinking block: {"type":"thinking", ...}
if blockType == "thinking" {
// Handle thinking/redacted_thinking blocks
if blockType == "thinking" || blockType == "redacted_thinking" {
// When thinking is enabled and this is an assistant message,
// only keep thinking blocks with valid (non-empty, non-dummy) signatures
if thinkingEnabled && role == "assistant" {
signature, _ := blockMap["signature"].(string)
// Keep blocks with valid signatures, remove those without
if signature != "" && signature != "skip_thought_signature_validator" {
newContent = append(newContent, block)
continue
}
}
filtered = true
filteredThisMessage = true
continue // Skip thinking blocks
continue
}
// Some clients send the "thinking" object without a "type" discriminator.
// Vertex/Claude still expects a signature for any thinking block, so we drop it.
// We intentionally do not drop other typed blocks (e.g. tool_use) that might
// legitimately contain a "thinking" key inside their payload.
if blockType == "" {
if _, hasThinking := blockMap["thinking"]; hasThinking {
if thinkingContent, hasThinking := blockMap["thinking"]; hasThinking {
_ = thinkingContent
filtered = true
filteredThisMessage = true
continue // Skip thinking blocks
continue
}
}