feat(sync): full code sync from release

2026-02-28 15:01:20 +08:00
parent bfc7b339f7
commit bb664d9bbf
338 changed files with 54513 additions and 2011 deletions
--- a/tools/perf/openai_responses_ws_v2_compare_k6.js
+++ b/tools/perf/openai_responses_ws_v2_compare_k6.js
@@ -0,0 +1,167 @@
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+
+const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
+const httpAPIKey = (__ENV.HTTP_API_KEY || '').trim();
+const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
+const model = __ENV.MODEL || 'gpt-5.1';
+const duration = __ENV.DURATION || '5m';
+const timeout = __ENV.TIMEOUT || '180s';
+
+const httpRPS = Number(__ENV.HTTP_RPS || 10);
+const wsRPS = Number(__ENV.WS_RPS || 10);
+const chainRPS = Number(__ENV.CHAIN_RPS || 1);
+const chainRounds = Number(__ENV.CHAIN_ROUNDS || 20);
+const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 40);
+const maxVUs = Number(__ENV.MAX_VUS || 300);
+
+const httpDurationMs = new Trend('openai_http_req_duration_ms', true);
+const wsDurationMs = new Trend('openai_ws_req_duration_ms', true);
+const wsChainDurationMs = new Trend('openai_ws_chain_round_duration_ms', true);
+const wsChainTTFTMs = new Trend('openai_ws_chain_round_ttft_ms', true);
+const httpNon2xxRate = new Rate('openai_http_non2xx_rate');
+const wsNon2xxRate = new Rate('openai_ws_non2xx_rate');
+const wsChainRoundSuccessRate = new Rate('openai_ws_chain_round_success_rate');
+
+export const options = {
+  scenarios: {
+    http_baseline: {
+      executor: 'constant-arrival-rate',
+      exec: 'runHTTPBaseline',
+      rate: httpRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      tags: { path: 'http_baseline' },
+    },
+    ws_baseline: {
+      executor: 'constant-arrival-rate',
+      exec: 'runWSBaseline',
+      rate: wsRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      tags: { path: 'ws_baseline' },
+    },
+    ws_chain_20_rounds: {
+      executor: 'constant-arrival-rate',
+      exec: 'runWSChain20Rounds',
+      rate: chainRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs: Math.max(2, Math.ceil(chainRPS * 2)),
+      maxVUs: Math.max(20, Math.ceil(chainRPS * 10)),
+      tags: { path: 'ws_chain_20_rounds' },
+    },
+  },
+  thresholds: {
+    openai_http_non2xx_rate: ['rate<0.02'],
+    openai_ws_non2xx_rate: ['rate<0.02'],
+    openai_http_req_duration_ms: ['p(95)<4000', 'p(99)<7000'],
+    openai_ws_req_duration_ms: ['p(95)<3000', 'p(99)<6000'],
+    openai_ws_chain_round_success_rate: ['rate>0.98'],
+    openai_ws_chain_round_ttft_ms: ['p(99)<1200'],
+  },
+};
+
+function buildHeaders(apiKey) {
+  const headers = {
+    'Content-Type': 'application/json',
+    'User-Agent': 'codex_cli_rs/0.98.0',
+  };
+  if (apiKey) {
+    headers.Authorization = `Bearer ${apiKey}`;
+  }
+  return headers;
+}
+
+function buildBody(previousResponseID) {
+  const body = {
+    model,
+    stream: false,
+    input: [
+      {
+        role: 'user',
+        content: [{ type: 'input_text', text: '请回复一个单词: pong' }],
+      },
+    ],
+    max_output_tokens: 64,
+  };
+  if (previousResponseID) {
+    body.previous_response_id = previousResponseID;
+  }
+  return JSON.stringify(body);
+}
+
+function postResponses(apiKey, body, tags) {
+  const res = http.post(`${baseURL}/v1/responses`, body, {
+    headers: buildHeaders(apiKey),
+    timeout,
+    tags,
+  });
+  check(res, {
+    'status is 2xx': (r) => r.status >= 200 && r.status < 300,
+  });
+  return res;
+}
+
+function parseResponseID(res) {
+  if (!res || !res.body) {
+    return '';
+  }
+  try {
+    const payload = JSON.parse(res.body);
+    if (payload && typeof payload.id === 'string') {
+      return payload.id.trim();
+    }
+  } catch (_) {
+    return '';
+  }
+  return '';
+}
+
+export function runHTTPBaseline() {
+  const res = postResponses(httpAPIKey, buildBody(''), { transport: 'http' });
+  httpDurationMs.add(res.timings.duration, { transport: 'http' });
+  httpNon2xxRate.add(res.status < 200 || res.status >= 300, { transport: 'http' });
+}
+
+export function runWSBaseline() {
+  const res = postResponses(wsAPIKey, buildBody(''), { transport: 'ws_v2' });
+  wsDurationMs.add(res.timings.duration, { transport: 'ws_v2' });
+  wsNon2xxRate.add(res.status < 200 || res.status >= 300, { transport: 'ws_v2' });
+}
+
+// 20+ 轮续链专项，验证 previous_response_id 在长链下的稳定性与时延。
+export function runWSChain20Rounds() {
+  let previousResponseID = '';
+  for (let round = 1; round <= chainRounds; round += 1) {
+    const roundStart = Date.now();
+    const res = postResponses(wsAPIKey, buildBody(previousResponseID), { transport: 'ws_v2_chain' });
+    const ok = res.status >= 200 && res.status < 300;
+    wsChainRoundSuccessRate.add(ok, { round: `${round}` });
+    wsChainDurationMs.add(Date.now() - roundStart, { round: `${round}` });
+    wsChainTTFTMs.add(res.timings.waiting, { round: `${round}` });
+    wsNon2xxRate.add(!ok, { transport: 'ws_v2_chain' });
+    if (!ok) {
+      return;
+    }
+    const respID = parseResponseID(res);
+    if (!respID) {
+      wsChainRoundSuccessRate.add(false, { round: `${round}`, reason: 'missing_response_id' });
+      return;
+    }
+    previousResponseID = respID;
+    sleep(0.01);
+  }
+}
+
+export function handleSummary(data) {
+  return {
+    stdout: `\nOpenAI WSv2 对比压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
+    'docs/perf/openai-ws-v2-compare-summary.json': JSON.stringify(data, null, 2),
+  };
+}
--- a/tools/perf/openai_ws_pooling_compare_k6.js
+++ b/tools/perf/openai_ws_pooling_compare_k6.js
@@ -0,0 +1,123 @@
+import http from 'k6/http';
+import { check } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+
+const pooledBaseURL = (__ENV.POOLED_BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
+const oneToOneBaseURL = (__ENV.ONE_TO_ONE_BASE_URL || '').replace(/\/$/, '');
+const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
+const model = __ENV.MODEL || 'gpt-5.1';
+const timeout = __ENV.TIMEOUT || '180s';
+const duration = __ENV.DURATION || '5m';
+const pooledRPS = Number(__ENV.POOLED_RPS || 12);
+const oneToOneRPS = Number(__ENV.ONE_TO_ONE_RPS || 12);
+const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
+const maxVUs = Number(__ENV.MAX_VUS || 400);
+
+const pooledDurationMs = new Trend('openai_ws_pooled_duration_ms', true);
+const oneToOneDurationMs = new Trend('openai_ws_one_to_one_duration_ms', true);
+const pooledTTFTMs = new Trend('openai_ws_pooled_ttft_ms', true);
+const oneToOneTTFTMs = new Trend('openai_ws_one_to_one_ttft_ms', true);
+const pooledNon2xxRate = new Rate('openai_ws_pooled_non2xx_rate');
+const oneToOneNon2xxRate = new Rate('openai_ws_one_to_one_non2xx_rate');
+
+export const options = {
+  scenarios: {
+    pooled_mode: {
+      executor: 'constant-arrival-rate',
+      exec: 'runPooledMode',
+      rate: pooledRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      tags: { mode: 'pooled' },
+    },
+    one_to_one_mode: {
+      executor: 'constant-arrival-rate',
+      exec: 'runOneToOneMode',
+      rate: oneToOneRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      tags: { mode: 'one_to_one' },
+      startTime: '5s',
+    },
+  },
+  thresholds: {
+    openai_ws_pooled_non2xx_rate: ['rate<0.02'],
+    openai_ws_one_to_one_non2xx_rate: ['rate<0.02'],
+    openai_ws_pooled_duration_ms: ['p(95)<3000', 'p(99)<6000'],
+    openai_ws_one_to_one_duration_ms: ['p(95)<6000', 'p(99)<10000'],
+  },
+};
+
+function buildHeaders() {
+  const headers = {
+    'Content-Type': 'application/json',
+    'User-Agent': 'codex_cli_rs/0.98.0',
+  };
+  if (wsAPIKey) {
+    headers.Authorization = `Bearer ${wsAPIKey}`;
+  }
+  return headers;
+}
+
+function buildBody() {
+  return JSON.stringify({
+    model,
+    stream: false,
+    input: [
+      {
+        role: 'user',
+        content: [{ type: 'input_text', text: '请回复: pong' }],
+      },
+    ],
+    max_output_tokens: 48,
+  });
+}
+
+function send(baseURL, mode) {
+  if (!baseURL) {
+    return null;
+  }
+  const res = http.post(`${baseURL}/v1/responses`, buildBody(), {
+    headers: buildHeaders(),
+    timeout,
+    tags: { mode },
+  });
+  check(res, {
+    'status is 2xx': (r) => r.status >= 200 && r.status < 300,
+  });
+  return res;
+}
+
+export function runPooledMode() {
+  const res = send(pooledBaseURL, 'pooled');
+  if (!res) {
+    return;
+  }
+  pooledDurationMs.add(res.timings.duration, { mode: 'pooled' });
+  pooledTTFTMs.add(res.timings.waiting, { mode: 'pooled' });
+  pooledNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'pooled' });
+}
+
+export function runOneToOneMode() {
+  if (!oneToOneBaseURL) {
+    return;
+  }
+  const res = send(oneToOneBaseURL, 'one_to_one');
+  if (!res) {
+    return;
+  }
+  oneToOneDurationMs.add(res.timings.duration, { mode: 'one_to_one' });
+  oneToOneTTFTMs.add(res.timings.waiting, { mode: 'one_to_one' });
+  oneToOneNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'one_to_one' });
+}
+
+export function handleSummary(data) {
+  return {
+    stdout: `\nOpenAI WS 池化 vs 1:1 对比压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
+    'docs/perf/openai-ws-pooling-compare-summary.json': JSON.stringify(data, null, 2),
+  };
+}
--- a/tools/perf/openai_ws_v2_perf_suite_k6.js
+++ b/tools/perf/openai_ws_v2_perf_suite_k6.js
@@ -0,0 +1,216 @@
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+
+const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
+const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
+const wsHotspotAPIKey = (__ENV.WS_HOTSPOT_API_KEY || wsAPIKey).trim();
+const model = __ENV.MODEL || 'gpt-5.3-codex';
+const duration = __ENV.DURATION || '5m';
+const timeout = __ENV.TIMEOUT || '180s';
+
+const shortRPS = Number(__ENV.SHORT_RPS || 12);
+const longRPS = Number(__ENV.LONG_RPS || 4);
+const errorRPS = Number(__ENV.ERROR_RPS || 2);
+const hotspotRPS = Number(__ENV.HOTSPOT_RPS || 10);
+const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
+const maxVUs = Number(__ENV.MAX_VUS || 400);
+
+const reqDurationMs = new Trend('openai_ws_v2_perf_req_duration_ms', true);
+const ttftMs = new Trend('openai_ws_v2_perf_ttft_ms', true);
+const non2xxRate = new Rate('openai_ws_v2_perf_non2xx_rate');
+const doneRate = new Rate('openai_ws_v2_perf_done_rate');
+const expectedErrorRate = new Rate('openai_ws_v2_perf_expected_error_rate');
+
+export const options = {
+  scenarios: {
+    short_request: {
+      executor: 'constant-arrival-rate',
+      exec: 'runShortRequest',
+      rate: shortRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      tags: { scenario: 'short_request' },
+    },
+    long_request: {
+      executor: 'constant-arrival-rate',
+      exec: 'runLongRequest',
+      rate: longRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs: Math.max(20, Math.ceil(longRPS * 6)),
+      maxVUs: Math.max(100, Math.ceil(longRPS * 20)),
+      tags: { scenario: 'long_request' },
+    },
+    error_injection: {
+      executor: 'constant-arrival-rate',
+      exec: 'runErrorInjection',
+      rate: errorRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs: Math.max(8, Math.ceil(errorRPS * 4)),
+      maxVUs: Math.max(40, Math.ceil(errorRPS * 12)),
+      tags: { scenario: 'error_injection' },
+    },
+    hotspot_account: {
+      executor: 'constant-arrival-rate',
+      exec: 'runHotspotAccount',
+      rate: hotspotRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs: Math.max(16, Math.ceil(hotspotRPS * 3)),
+      maxVUs: Math.max(80, Math.ceil(hotspotRPS * 10)),
+      tags: { scenario: 'hotspot_account' },
+    },
+  },
+  thresholds: {
+    openai_ws_v2_perf_non2xx_rate: ['rate<0.05'],
+    openai_ws_v2_perf_req_duration_ms: ['p(95)<5000', 'p(99)<9000'],
+    openai_ws_v2_perf_ttft_ms: ['p(99)<2000'],
+    openai_ws_v2_perf_done_rate: ['rate>0.95'],
+  },
+};
+
+function buildHeaders(apiKey, opts = {}) {
+  const headers = {
+    'Content-Type': 'application/json',
+    'User-Agent': 'codex_cli_rs/0.104.0',
+    'OpenAI-Beta': 'responses_websockets=2026-02-06,responses=experimental',
+  };
+  if (apiKey) {
+    headers.Authorization = `Bearer ${apiKey}`;
+  }
+  if (opts.sessionID) {
+    headers.session_id = opts.sessionID;
+  }
+  if (opts.conversationID) {
+    headers.conversation_id = opts.conversationID;
+  }
+  return headers;
+}
+
+function shortBody() {
+  return JSON.stringify({
+    model,
+    stream: false,
+    input: [
+      {
+        role: 'user',
+        content: [{ type: 'input_text', text: '请回复一个词：pong' }],
+      },
+    ],
+    max_output_tokens: 64,
+  });
+}
+
+function longBody() {
+  const tools = [];
+  for (let i = 0; i < 28; i += 1) {
+    tools.push({
+      type: 'function',
+      name: `perf_tool_${i}`,
+      description: 'load test tool schema',
+      parameters: {
+        type: 'object',
+        properties: {
+          query: { type: 'string' },
+          limit: { type: 'number' },
+          with_cache: { type: 'boolean' },
+        },
+        required: ['query'],
+      },
+    });
+  }
+
+  const input = [];
+  for (let i = 0; i < 20; i += 1) {
+    input.push({
+      role: 'user',
+      content: [{ type: 'input_text', text: `长请求压测消息 ${i}: 请输出简要摘要。` }],
+    });
+  }
+
+  return JSON.stringify({
+    model,
+    stream: false,
+    input,
+    tools,
+    parallel_tool_calls: true,
+    max_output_tokens: 256,
+    reasoning: { effort: 'medium' },
+    instructions: '你是压测助手，简洁回复。',
+  });
+}
+
+function errorInjectionBody() {
+  return JSON.stringify({
+    model,
+    stream: false,
+    previous_response_id: `resp_not_found_${__VU}_${__ITER}`,
+    input: [
+      {
+        role: 'user',
+        content: [{ type: 'input_text', text: '触发错误注入路径。' }],
+      },
+    ],
+  });
+}
+
+function postResponses(apiKey, body, tags, opts = {}) {
+  const res = http.post(`${baseURL}/v1/responses`, body, {
+    headers: buildHeaders(apiKey, opts),
+    timeout,
+    tags,
+  });
+  reqDurationMs.add(res.timings.duration, tags);
+  ttftMs.add(res.timings.waiting, tags);
+  non2xxRate.add(res.status < 200 || res.status >= 300, tags);
+  return res;
+}
+
+function hasDone(res) {
+  return !!res && !!res.body && res.body.indexOf('[DONE]') >= 0;
+}
+
+export function runShortRequest() {
+  const tags = { scenario: 'short_request' };
+  const res = postResponses(wsAPIKey, shortBody(), tags);
+  check(res, { 'short status is 2xx': (r) => r.status >= 200 && r.status < 300 });
+  doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
+}
+
+export function runLongRequest() {
+  const tags = { scenario: 'long_request' };
+  const res = postResponses(wsAPIKey, longBody(), tags);
+  check(res, { 'long status is 2xx': (r) => r.status >= 200 && r.status < 300 });
+  doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
+}
+
+export function runErrorInjection() {
+  const tags = { scenario: 'error_injection' };
+  const res = postResponses(wsAPIKey, errorInjectionBody(), tags);
+  // 错误注入场景允许 4xx/5xx，重点观测 fallback 和错误路径抖动。
+  expectedErrorRate.add(res.status >= 400, tags);
+  doneRate.add(hasDone(res), tags);
+}
+
+export function runHotspotAccount() {
+  const tags = { scenario: 'hotspot_account' };
+  const opts = {
+    sessionID: 'perf-hotspot-session-fixed',
+    conversationID: 'perf-hotspot-conversation-fixed',
+  };
+  const res = postResponses(wsHotspotAPIKey, shortBody(), tags, opts);
+  check(res, { 'hotspot status is 2xx': (r) => r.status >= 200 && r.status < 300 });
+  doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
+  sleep(0.01);
+}
+
+export function handleSummary(data) {
+  return {
+    stdout: `\nOpenAI WSv2 性能套件压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
+    'docs/perf/openai-ws-v2-perf-suite-summary.json': JSON.stringify(data, null, 2),
+  };
+}