Files
xinghuoapi/tools/perf/openai_ws_v2_perf_suite_k6.js
2026-02-28 15:01:20 +08:00

217 lines
6.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
const wsHotspotAPIKey = (__ENV.WS_HOTSPOT_API_KEY || wsAPIKey).trim();
const model = __ENV.MODEL || 'gpt-5.3-codex';
const duration = __ENV.DURATION || '5m';
const timeout = __ENV.TIMEOUT || '180s';
const shortRPS = Number(__ENV.SHORT_RPS || 12);
const longRPS = Number(__ENV.LONG_RPS || 4);
const errorRPS = Number(__ENV.ERROR_RPS || 2);
const hotspotRPS = Number(__ENV.HOTSPOT_RPS || 10);
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
const maxVUs = Number(__ENV.MAX_VUS || 400);
const reqDurationMs = new Trend('openai_ws_v2_perf_req_duration_ms', true);
const ttftMs = new Trend('openai_ws_v2_perf_ttft_ms', true);
const non2xxRate = new Rate('openai_ws_v2_perf_non2xx_rate');
const doneRate = new Rate('openai_ws_v2_perf_done_rate');
const expectedErrorRate = new Rate('openai_ws_v2_perf_expected_error_rate');
export const options = {
scenarios: {
short_request: {
executor: 'constant-arrival-rate',
exec: 'runShortRequest',
rate: shortRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { scenario: 'short_request' },
},
long_request: {
executor: 'constant-arrival-rate',
exec: 'runLongRequest',
rate: longRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(20, Math.ceil(longRPS * 6)),
maxVUs: Math.max(100, Math.ceil(longRPS * 20)),
tags: { scenario: 'long_request' },
},
error_injection: {
executor: 'constant-arrival-rate',
exec: 'runErrorInjection',
rate: errorRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(8, Math.ceil(errorRPS * 4)),
maxVUs: Math.max(40, Math.ceil(errorRPS * 12)),
tags: { scenario: 'error_injection' },
},
hotspot_account: {
executor: 'constant-arrival-rate',
exec: 'runHotspotAccount',
rate: hotspotRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(16, Math.ceil(hotspotRPS * 3)),
maxVUs: Math.max(80, Math.ceil(hotspotRPS * 10)),
tags: { scenario: 'hotspot_account' },
},
},
thresholds: {
openai_ws_v2_perf_non2xx_rate: ['rate<0.05'],
openai_ws_v2_perf_req_duration_ms: ['p(95)<5000', 'p(99)<9000'],
openai_ws_v2_perf_ttft_ms: ['p(99)<2000'],
openai_ws_v2_perf_done_rate: ['rate>0.95'],
},
};
function buildHeaders(apiKey, opts = {}) {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.104.0',
'OpenAI-Beta': 'responses_websockets=2026-02-06,responses=experimental',
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
if (opts.sessionID) {
headers.session_id = opts.sessionID;
}
if (opts.conversationID) {
headers.conversation_id = opts.conversationID;
}
return headers;
}
function shortBody() {
return JSON.stringify({
model,
stream: false,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '请回复一个词pong' }],
},
],
max_output_tokens: 64,
});
}
function longBody() {
const tools = [];
for (let i = 0; i < 28; i += 1) {
tools.push({
type: 'function',
name: `perf_tool_${i}`,
description: 'load test tool schema',
parameters: {
type: 'object',
properties: {
query: { type: 'string' },
limit: { type: 'number' },
with_cache: { type: 'boolean' },
},
required: ['query'],
},
});
}
const input = [];
for (let i = 0; i < 20; i += 1) {
input.push({
role: 'user',
content: [{ type: 'input_text', text: `长请求压测消息 ${i}: 请输出简要摘要。` }],
});
}
return JSON.stringify({
model,
stream: false,
input,
tools,
parallel_tool_calls: true,
max_output_tokens: 256,
reasoning: { effort: 'medium' },
instructions: '你是压测助手,简洁回复。',
});
}
function errorInjectionBody() {
return JSON.stringify({
model,
stream: false,
previous_response_id: `resp_not_found_${__VU}_${__ITER}`,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '触发错误注入路径。' }],
},
],
});
}
function postResponses(apiKey, body, tags, opts = {}) {
const res = http.post(`${baseURL}/v1/responses`, body, {
headers: buildHeaders(apiKey, opts),
timeout,
tags,
});
reqDurationMs.add(res.timings.duration, tags);
ttftMs.add(res.timings.waiting, tags);
non2xxRate.add(res.status < 200 || res.status >= 300, tags);
return res;
}
function hasDone(res) {
return !!res && !!res.body && res.body.indexOf('[DONE]') >= 0;
}
export function runShortRequest() {
const tags = { scenario: 'short_request' };
const res = postResponses(wsAPIKey, shortBody(), tags);
check(res, { 'short status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
}
export function runLongRequest() {
const tags = { scenario: 'long_request' };
const res = postResponses(wsAPIKey, longBody(), tags);
check(res, { 'long status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
}
export function runErrorInjection() {
const tags = { scenario: 'error_injection' };
const res = postResponses(wsAPIKey, errorInjectionBody(), tags);
// 错误注入场景允许 4xx/5xx重点观测 fallback 和错误路径抖动。
expectedErrorRate.add(res.status >= 400, tags);
doneRate.add(hasDone(res), tags);
}
export function runHotspotAccount() {
const tags = { scenario: 'hotspot_account' };
const opts = {
sessionID: 'perf-hotspot-session-fixed',
conversationID: 'perf-hotspot-conversation-fixed',
};
const res = postResponses(wsHotspotAPIKey, shortBody(), tags, opts);
check(res, { 'hotspot status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
sleep(0.01);
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI WSv2 性能套件压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-ws-v2-perf-suite-summary.json': JSON.stringify(data, null, 2),
};
}