feat(sync): full code sync from release

2026-02-28 15:01:20 +08:00
parent bfc7b339f7
commit bb664d9bbf
338 changed files with 54513 additions and 2011 deletions
--- a/tools/perf/openai_ws_v2_perf_suite_k6.js
+++ b/tools/perf/openai_ws_v2_perf_suite_k6.js
@@ -0,0 +1,216 @@
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+
+const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
+const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
+const wsHotspotAPIKey = (__ENV.WS_HOTSPOT_API_KEY || wsAPIKey).trim();
+const model = __ENV.MODEL || 'gpt-5.3-codex';
+const duration = __ENV.DURATION || '5m';
+const timeout = __ENV.TIMEOUT || '180s';
+
+const shortRPS = Number(__ENV.SHORT_RPS || 12);
+const longRPS = Number(__ENV.LONG_RPS || 4);
+const errorRPS = Number(__ENV.ERROR_RPS || 2);
+const hotspotRPS = Number(__ENV.HOTSPOT_RPS || 10);
+const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
+const maxVUs = Number(__ENV.MAX_VUS || 400);
+
+const reqDurationMs = new Trend('openai_ws_v2_perf_req_duration_ms', true);
+const ttftMs = new Trend('openai_ws_v2_perf_ttft_ms', true);
+const non2xxRate = new Rate('openai_ws_v2_perf_non2xx_rate');
+const doneRate = new Rate('openai_ws_v2_perf_done_rate');
+const expectedErrorRate = new Rate('openai_ws_v2_perf_expected_error_rate');
+
+export const options = {
+  scenarios: {
+    short_request: {
+      executor: 'constant-arrival-rate',
+      exec: 'runShortRequest',
+      rate: shortRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      tags: { scenario: 'short_request' },
+    },
+    long_request: {
+      executor: 'constant-arrival-rate',
+      exec: 'runLongRequest',
+      rate: longRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs: Math.max(20, Math.ceil(longRPS * 6)),
+      maxVUs: Math.max(100, Math.ceil(longRPS * 20)),
+      tags: { scenario: 'long_request' },
+    },
+    error_injection: {
+      executor: 'constant-arrival-rate',
+      exec: 'runErrorInjection',
+      rate: errorRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs: Math.max(8, Math.ceil(errorRPS * 4)),
+      maxVUs: Math.max(40, Math.ceil(errorRPS * 12)),
+      tags: { scenario: 'error_injection' },
+    },
+    hotspot_account: {
+      executor: 'constant-arrival-rate',
+      exec: 'runHotspotAccount',
+      rate: hotspotRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs: Math.max(16, Math.ceil(hotspotRPS * 3)),
+      maxVUs: Math.max(80, Math.ceil(hotspotRPS * 10)),
+      tags: { scenario: 'hotspot_account' },
+    },
+  },
+  thresholds: {
+    openai_ws_v2_perf_non2xx_rate: ['rate<0.05'],
+    openai_ws_v2_perf_req_duration_ms: ['p(95)<5000', 'p(99)<9000'],
+    openai_ws_v2_perf_ttft_ms: ['p(99)<2000'],
+    openai_ws_v2_perf_done_rate: ['rate>0.95'],
+  },
+};
+
+function buildHeaders(apiKey, opts = {}) {
+  const headers = {
+    'Content-Type': 'application/json',
+    'User-Agent': 'codex_cli_rs/0.104.0',
+    'OpenAI-Beta': 'responses_websockets=2026-02-06,responses=experimental',
+  };
+  if (apiKey) {
+    headers.Authorization = `Bearer ${apiKey}`;
+  }
+  if (opts.sessionID) {
+    headers.session_id = opts.sessionID;
+  }
+  if (opts.conversationID) {
+    headers.conversation_id = opts.conversationID;
+  }
+  return headers;
+}
+
+function shortBody() {
+  return JSON.stringify({
+    model,
+    stream: false,
+    input: [
+      {
+        role: 'user',
+        content: [{ type: 'input_text', text: '请回复一个词：pong' }],
+      },
+    ],
+    max_output_tokens: 64,
+  });
+}
+
+function longBody() {
+  const tools = [];
+  for (let i = 0; i < 28; i += 1) {
+    tools.push({
+      type: 'function',
+      name: `perf_tool_${i}`,
+      description: 'load test tool schema',
+      parameters: {
+        type: 'object',
+        properties: {
+          query: { type: 'string' },
+          limit: { type: 'number' },
+          with_cache: { type: 'boolean' },
+        },
+        required: ['query'],
+      },
+    });
+  }
+
+  const input = [];
+  for (let i = 0; i < 20; i += 1) {
+    input.push({
+      role: 'user',
+      content: [{ type: 'input_text', text: `长请求压测消息 ${i}: 请输出简要摘要。` }],
+    });
+  }
+
+  return JSON.stringify({
+    model,
+    stream: false,
+    input,
+    tools,
+    parallel_tool_calls: true,
+    max_output_tokens: 256,
+    reasoning: { effort: 'medium' },
+    instructions: '你是压测助手，简洁回复。',
+  });
+}
+
+function errorInjectionBody() {
+  return JSON.stringify({
+    model,
+    stream: false,
+    previous_response_id: `resp_not_found_${__VU}_${__ITER}`,
+    input: [
+      {
+        role: 'user',
+        content: [{ type: 'input_text', text: '触发错误注入路径。' }],
+      },
+    ],
+  });
+}
+
+function postResponses(apiKey, body, tags, opts = {}) {
+  const res = http.post(`${baseURL}/v1/responses`, body, {
+    headers: buildHeaders(apiKey, opts),
+    timeout,
+    tags,
+  });
+  reqDurationMs.add(res.timings.duration, tags);
+  ttftMs.add(res.timings.waiting, tags);
+  non2xxRate.add(res.status < 200 || res.status >= 300, tags);
+  return res;
+}
+
+function hasDone(res) {
+  return !!res && !!res.body && res.body.indexOf('[DONE]') >= 0;
+}
+
+export function runShortRequest() {
+  const tags = { scenario: 'short_request' };
+  const res = postResponses(wsAPIKey, shortBody(), tags);
+  check(res, { 'short status is 2xx': (r) => r.status >= 200 && r.status < 300 });
+  doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
+}
+
+export function runLongRequest() {
+  const tags = { scenario: 'long_request' };
+  const res = postResponses(wsAPIKey, longBody(), tags);
+  check(res, { 'long status is 2xx': (r) => r.status >= 200 && r.status < 300 });
+  doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
+}
+
+export function runErrorInjection() {
+  const tags = { scenario: 'error_injection' };
+  const res = postResponses(wsAPIKey, errorInjectionBody(), tags);
+  // 错误注入场景允许 4xx/5xx，重点观测 fallback 和错误路径抖动。
+  expectedErrorRate.add(res.status >= 400, tags);
+  doneRate.add(hasDone(res), tags);
+}
+
+export function runHotspotAccount() {
+  const tags = { scenario: 'hotspot_account' };
+  const opts = {
+    sessionID: 'perf-hotspot-session-fixed',
+    conversationID: 'perf-hotspot-conversation-fixed',
+  };
+  const res = postResponses(wsHotspotAPIKey, shortBody(), tags, opts);
+  check(res, { 'hotspot status is 2xx': (r) => r.status >= 200 && r.status < 300 });
+  doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
+  sleep(0.01);
+}
+
+export function handleSummary(data) {
+  return {
+    stdout: `\nOpenAI WSv2 性能套件压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
+    'docs/perf/openai-ws-v2-perf-suite-summary.json': JSON.stringify(data, null, 2),
+  };
+}