feat(sync): full code sync from release

This commit is contained in:
yangjianbo
2026-02-28 15:01:20 +08:00
parent bfc7b339f7
commit bb664d9bbf
338 changed files with 54513 additions and 2011 deletions

View File

@@ -0,0 +1,167 @@
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
const httpAPIKey = (__ENV.HTTP_API_KEY || '').trim();
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
const model = __ENV.MODEL || 'gpt-5.1';
const duration = __ENV.DURATION || '5m';
const timeout = __ENV.TIMEOUT || '180s';
const httpRPS = Number(__ENV.HTTP_RPS || 10);
const wsRPS = Number(__ENV.WS_RPS || 10);
const chainRPS = Number(__ENV.CHAIN_RPS || 1);
const chainRounds = Number(__ENV.CHAIN_ROUNDS || 20);
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 40);
const maxVUs = Number(__ENV.MAX_VUS || 300);
const httpDurationMs = new Trend('openai_http_req_duration_ms', true);
const wsDurationMs = new Trend('openai_ws_req_duration_ms', true);
const wsChainDurationMs = new Trend('openai_ws_chain_round_duration_ms', true);
const wsChainTTFTMs = new Trend('openai_ws_chain_round_ttft_ms', true);
const httpNon2xxRate = new Rate('openai_http_non2xx_rate');
const wsNon2xxRate = new Rate('openai_ws_non2xx_rate');
const wsChainRoundSuccessRate = new Rate('openai_ws_chain_round_success_rate');
export const options = {
scenarios: {
http_baseline: {
executor: 'constant-arrival-rate',
exec: 'runHTTPBaseline',
rate: httpRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { path: 'http_baseline' },
},
ws_baseline: {
executor: 'constant-arrival-rate',
exec: 'runWSBaseline',
rate: wsRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { path: 'ws_baseline' },
},
ws_chain_20_rounds: {
executor: 'constant-arrival-rate',
exec: 'runWSChain20Rounds',
rate: chainRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(2, Math.ceil(chainRPS * 2)),
maxVUs: Math.max(20, Math.ceil(chainRPS * 10)),
tags: { path: 'ws_chain_20_rounds' },
},
},
thresholds: {
openai_http_non2xx_rate: ['rate<0.02'],
openai_ws_non2xx_rate: ['rate<0.02'],
openai_http_req_duration_ms: ['p(95)<4000', 'p(99)<7000'],
openai_ws_req_duration_ms: ['p(95)<3000', 'p(99)<6000'],
openai_ws_chain_round_success_rate: ['rate>0.98'],
openai_ws_chain_round_ttft_ms: ['p(99)<1200'],
},
};
function buildHeaders(apiKey) {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.98.0',
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
return headers;
}
function buildBody(previousResponseID) {
const body = {
model,
stream: false,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '请回复一个单词: pong' }],
},
],
max_output_tokens: 64,
};
if (previousResponseID) {
body.previous_response_id = previousResponseID;
}
return JSON.stringify(body);
}
function postResponses(apiKey, body, tags) {
const res = http.post(`${baseURL}/v1/responses`, body, {
headers: buildHeaders(apiKey),
timeout,
tags,
});
check(res, {
'status is 2xx': (r) => r.status >= 200 && r.status < 300,
});
return res;
}
function parseResponseID(res) {
if (!res || !res.body) {
return '';
}
try {
const payload = JSON.parse(res.body);
if (payload && typeof payload.id === 'string') {
return payload.id.trim();
}
} catch (_) {
return '';
}
return '';
}
export function runHTTPBaseline() {
const res = postResponses(httpAPIKey, buildBody(''), { transport: 'http' });
httpDurationMs.add(res.timings.duration, { transport: 'http' });
httpNon2xxRate.add(res.status < 200 || res.status >= 300, { transport: 'http' });
}
export function runWSBaseline() {
const res = postResponses(wsAPIKey, buildBody(''), { transport: 'ws_v2' });
wsDurationMs.add(res.timings.duration, { transport: 'ws_v2' });
wsNon2xxRate.add(res.status < 200 || res.status >= 300, { transport: 'ws_v2' });
}
// 20+ 轮续链专项,验证 previous_response_id 在长链下的稳定性与时延。
export function runWSChain20Rounds() {
let previousResponseID = '';
for (let round = 1; round <= chainRounds; round += 1) {
const roundStart = Date.now();
const res = postResponses(wsAPIKey, buildBody(previousResponseID), { transport: 'ws_v2_chain' });
const ok = res.status >= 200 && res.status < 300;
wsChainRoundSuccessRate.add(ok, { round: `${round}` });
wsChainDurationMs.add(Date.now() - roundStart, { round: `${round}` });
wsChainTTFTMs.add(res.timings.waiting, { round: `${round}` });
wsNon2xxRate.add(!ok, { transport: 'ws_v2_chain' });
if (!ok) {
return;
}
const respID = parseResponseID(res);
if (!respID) {
wsChainRoundSuccessRate.add(false, { round: `${round}`, reason: 'missing_response_id' });
return;
}
previousResponseID = respID;
sleep(0.01);
}
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI WSv2 对比压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-ws-v2-compare-summary.json': JSON.stringify(data, null, 2),
};
}

View File

@@ -0,0 +1,123 @@
import http from 'k6/http';
import { check } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const pooledBaseURL = (__ENV.POOLED_BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
const oneToOneBaseURL = (__ENV.ONE_TO_ONE_BASE_URL || '').replace(/\/$/, '');
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
const model = __ENV.MODEL || 'gpt-5.1';
const timeout = __ENV.TIMEOUT || '180s';
const duration = __ENV.DURATION || '5m';
const pooledRPS = Number(__ENV.POOLED_RPS || 12);
const oneToOneRPS = Number(__ENV.ONE_TO_ONE_RPS || 12);
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
const maxVUs = Number(__ENV.MAX_VUS || 400);
const pooledDurationMs = new Trend('openai_ws_pooled_duration_ms', true);
const oneToOneDurationMs = new Trend('openai_ws_one_to_one_duration_ms', true);
const pooledTTFTMs = new Trend('openai_ws_pooled_ttft_ms', true);
const oneToOneTTFTMs = new Trend('openai_ws_one_to_one_ttft_ms', true);
const pooledNon2xxRate = new Rate('openai_ws_pooled_non2xx_rate');
const oneToOneNon2xxRate = new Rate('openai_ws_one_to_one_non2xx_rate');
export const options = {
scenarios: {
pooled_mode: {
executor: 'constant-arrival-rate',
exec: 'runPooledMode',
rate: pooledRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { mode: 'pooled' },
},
one_to_one_mode: {
executor: 'constant-arrival-rate',
exec: 'runOneToOneMode',
rate: oneToOneRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { mode: 'one_to_one' },
startTime: '5s',
},
},
thresholds: {
openai_ws_pooled_non2xx_rate: ['rate<0.02'],
openai_ws_one_to_one_non2xx_rate: ['rate<0.02'],
openai_ws_pooled_duration_ms: ['p(95)<3000', 'p(99)<6000'],
openai_ws_one_to_one_duration_ms: ['p(95)<6000', 'p(99)<10000'],
},
};
function buildHeaders() {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.98.0',
};
if (wsAPIKey) {
headers.Authorization = `Bearer ${wsAPIKey}`;
}
return headers;
}
function buildBody() {
return JSON.stringify({
model,
stream: false,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '请回复: pong' }],
},
],
max_output_tokens: 48,
});
}
function send(baseURL, mode) {
if (!baseURL) {
return null;
}
const res = http.post(`${baseURL}/v1/responses`, buildBody(), {
headers: buildHeaders(),
timeout,
tags: { mode },
});
check(res, {
'status is 2xx': (r) => r.status >= 200 && r.status < 300,
});
return res;
}
export function runPooledMode() {
const res = send(pooledBaseURL, 'pooled');
if (!res) {
return;
}
pooledDurationMs.add(res.timings.duration, { mode: 'pooled' });
pooledTTFTMs.add(res.timings.waiting, { mode: 'pooled' });
pooledNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'pooled' });
}
export function runOneToOneMode() {
if (!oneToOneBaseURL) {
return;
}
const res = send(oneToOneBaseURL, 'one_to_one');
if (!res) {
return;
}
oneToOneDurationMs.add(res.timings.duration, { mode: 'one_to_one' });
oneToOneTTFTMs.add(res.timings.waiting, { mode: 'one_to_one' });
oneToOneNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'one_to_one' });
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI WS 池化 vs 1:1 对比压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-ws-pooling-compare-summary.json': JSON.stringify(data, null, 2),
};
}

View File

@@ -0,0 +1,216 @@
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
const wsHotspotAPIKey = (__ENV.WS_HOTSPOT_API_KEY || wsAPIKey).trim();
const model = __ENV.MODEL || 'gpt-5.3-codex';
const duration = __ENV.DURATION || '5m';
const timeout = __ENV.TIMEOUT || '180s';
const shortRPS = Number(__ENV.SHORT_RPS || 12);
const longRPS = Number(__ENV.LONG_RPS || 4);
const errorRPS = Number(__ENV.ERROR_RPS || 2);
const hotspotRPS = Number(__ENV.HOTSPOT_RPS || 10);
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
const maxVUs = Number(__ENV.MAX_VUS || 400);
const reqDurationMs = new Trend('openai_ws_v2_perf_req_duration_ms', true);
const ttftMs = new Trend('openai_ws_v2_perf_ttft_ms', true);
const non2xxRate = new Rate('openai_ws_v2_perf_non2xx_rate');
const doneRate = new Rate('openai_ws_v2_perf_done_rate');
const expectedErrorRate = new Rate('openai_ws_v2_perf_expected_error_rate');
export const options = {
scenarios: {
short_request: {
executor: 'constant-arrival-rate',
exec: 'runShortRequest',
rate: shortRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { scenario: 'short_request' },
},
long_request: {
executor: 'constant-arrival-rate',
exec: 'runLongRequest',
rate: longRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(20, Math.ceil(longRPS * 6)),
maxVUs: Math.max(100, Math.ceil(longRPS * 20)),
tags: { scenario: 'long_request' },
},
error_injection: {
executor: 'constant-arrival-rate',
exec: 'runErrorInjection',
rate: errorRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(8, Math.ceil(errorRPS * 4)),
maxVUs: Math.max(40, Math.ceil(errorRPS * 12)),
tags: { scenario: 'error_injection' },
},
hotspot_account: {
executor: 'constant-arrival-rate',
exec: 'runHotspotAccount',
rate: hotspotRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(16, Math.ceil(hotspotRPS * 3)),
maxVUs: Math.max(80, Math.ceil(hotspotRPS * 10)),
tags: { scenario: 'hotspot_account' },
},
},
thresholds: {
openai_ws_v2_perf_non2xx_rate: ['rate<0.05'],
openai_ws_v2_perf_req_duration_ms: ['p(95)<5000', 'p(99)<9000'],
openai_ws_v2_perf_ttft_ms: ['p(99)<2000'],
openai_ws_v2_perf_done_rate: ['rate>0.95'],
},
};
function buildHeaders(apiKey, opts = {}) {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.104.0',
'OpenAI-Beta': 'responses_websockets=2026-02-06,responses=experimental',
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
if (opts.sessionID) {
headers.session_id = opts.sessionID;
}
if (opts.conversationID) {
headers.conversation_id = opts.conversationID;
}
return headers;
}
function shortBody() {
return JSON.stringify({
model,
stream: false,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '请回复一个词pong' }],
},
],
max_output_tokens: 64,
});
}
function longBody() {
const tools = [];
for (let i = 0; i < 28; i += 1) {
tools.push({
type: 'function',
name: `perf_tool_${i}`,
description: 'load test tool schema',
parameters: {
type: 'object',
properties: {
query: { type: 'string' },
limit: { type: 'number' },
with_cache: { type: 'boolean' },
},
required: ['query'],
},
});
}
const input = [];
for (let i = 0; i < 20; i += 1) {
input.push({
role: 'user',
content: [{ type: 'input_text', text: `长请求压测消息 ${i}: 请输出简要摘要。` }],
});
}
return JSON.stringify({
model,
stream: false,
input,
tools,
parallel_tool_calls: true,
max_output_tokens: 256,
reasoning: { effort: 'medium' },
instructions: '你是压测助手,简洁回复。',
});
}
function errorInjectionBody() {
return JSON.stringify({
model,
stream: false,
previous_response_id: `resp_not_found_${__VU}_${__ITER}`,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '触发错误注入路径。' }],
},
],
});
}
function postResponses(apiKey, body, tags, opts = {}) {
const res = http.post(`${baseURL}/v1/responses`, body, {
headers: buildHeaders(apiKey, opts),
timeout,
tags,
});
reqDurationMs.add(res.timings.duration, tags);
ttftMs.add(res.timings.waiting, tags);
non2xxRate.add(res.status < 200 || res.status >= 300, tags);
return res;
}
function hasDone(res) {
return !!res && !!res.body && res.body.indexOf('[DONE]') >= 0;
}
export function runShortRequest() {
const tags = { scenario: 'short_request' };
const res = postResponses(wsAPIKey, shortBody(), tags);
check(res, { 'short status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
}
export function runLongRequest() {
const tags = { scenario: 'long_request' };
const res = postResponses(wsAPIKey, longBody(), tags);
check(res, { 'long status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
}
export function runErrorInjection() {
const tags = { scenario: 'error_injection' };
const res = postResponses(wsAPIKey, errorInjectionBody(), tags);
// 错误注入场景允许 4xx/5xx重点观测 fallback 和错误路径抖动。
expectedErrorRate.add(res.status >= 400, tags);
doneRate.add(hasDone(res), tags);
}
export function runHotspotAccount() {
const tags = { scenario: 'hotspot_account' };
const opts = {
sessionID: 'perf-hotspot-session-fixed',
conversationID: 'perf-hotspot-conversation-fixed',
};
const res = postResponses(wsHotspotAPIKey, shortBody(), tags, opts);
check(res, { 'hotspot status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
sleep(0.01);
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI WSv2 性能套件压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-ws-v2-perf-suite-summary.json': JSON.stringify(data, null, 2),
};
}