feat(sync): full code sync from release
This commit is contained in:
123
tools/perf/openai_ws_pooling_compare_k6.js
Normal file
123
tools/perf/openai_ws_pooling_compare_k6.js
Normal file
@@ -0,0 +1,123 @@
|
||||
import http from 'k6/http';
|
||||
import { check } from 'k6';
|
||||
import { Rate, Trend } from 'k6/metrics';
|
||||
|
||||
const pooledBaseURL = (__ENV.POOLED_BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
|
||||
const oneToOneBaseURL = (__ENV.ONE_TO_ONE_BASE_URL || '').replace(/\/$/, '');
|
||||
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
|
||||
const model = __ENV.MODEL || 'gpt-5.1';
|
||||
const timeout = __ENV.TIMEOUT || '180s';
|
||||
const duration = __ENV.DURATION || '5m';
|
||||
const pooledRPS = Number(__ENV.POOLED_RPS || 12);
|
||||
const oneToOneRPS = Number(__ENV.ONE_TO_ONE_RPS || 12);
|
||||
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
|
||||
const maxVUs = Number(__ENV.MAX_VUS || 400);
|
||||
|
||||
const pooledDurationMs = new Trend('openai_ws_pooled_duration_ms', true);
|
||||
const oneToOneDurationMs = new Trend('openai_ws_one_to_one_duration_ms', true);
|
||||
const pooledTTFTMs = new Trend('openai_ws_pooled_ttft_ms', true);
|
||||
const oneToOneTTFTMs = new Trend('openai_ws_one_to_one_ttft_ms', true);
|
||||
const pooledNon2xxRate = new Rate('openai_ws_pooled_non2xx_rate');
|
||||
const oneToOneNon2xxRate = new Rate('openai_ws_one_to_one_non2xx_rate');
|
||||
|
||||
export const options = {
|
||||
scenarios: {
|
||||
pooled_mode: {
|
||||
executor: 'constant-arrival-rate',
|
||||
exec: 'runPooledMode',
|
||||
rate: pooledRPS,
|
||||
timeUnit: '1s',
|
||||
duration,
|
||||
preAllocatedVUs,
|
||||
maxVUs,
|
||||
tags: { mode: 'pooled' },
|
||||
},
|
||||
one_to_one_mode: {
|
||||
executor: 'constant-arrival-rate',
|
||||
exec: 'runOneToOneMode',
|
||||
rate: oneToOneRPS,
|
||||
timeUnit: '1s',
|
||||
duration,
|
||||
preAllocatedVUs,
|
||||
maxVUs,
|
||||
tags: { mode: 'one_to_one' },
|
||||
startTime: '5s',
|
||||
},
|
||||
},
|
||||
thresholds: {
|
||||
openai_ws_pooled_non2xx_rate: ['rate<0.02'],
|
||||
openai_ws_one_to_one_non2xx_rate: ['rate<0.02'],
|
||||
openai_ws_pooled_duration_ms: ['p(95)<3000', 'p(99)<6000'],
|
||||
openai_ws_one_to_one_duration_ms: ['p(95)<6000', 'p(99)<10000'],
|
||||
},
|
||||
};
|
||||
|
||||
function buildHeaders() {
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'codex_cli_rs/0.98.0',
|
||||
};
|
||||
if (wsAPIKey) {
|
||||
headers.Authorization = `Bearer ${wsAPIKey}`;
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
function buildBody() {
|
||||
return JSON.stringify({
|
||||
model,
|
||||
stream: false,
|
||||
input: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'input_text', text: '请回复: pong' }],
|
||||
},
|
||||
],
|
||||
max_output_tokens: 48,
|
||||
});
|
||||
}
|
||||
|
||||
function send(baseURL, mode) {
|
||||
if (!baseURL) {
|
||||
return null;
|
||||
}
|
||||
const res = http.post(`${baseURL}/v1/responses`, buildBody(), {
|
||||
headers: buildHeaders(),
|
||||
timeout,
|
||||
tags: { mode },
|
||||
});
|
||||
check(res, {
|
||||
'status is 2xx': (r) => r.status >= 200 && r.status < 300,
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
export function runPooledMode() {
|
||||
const res = send(pooledBaseURL, 'pooled');
|
||||
if (!res) {
|
||||
return;
|
||||
}
|
||||
pooledDurationMs.add(res.timings.duration, { mode: 'pooled' });
|
||||
pooledTTFTMs.add(res.timings.waiting, { mode: 'pooled' });
|
||||
pooledNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'pooled' });
|
||||
}
|
||||
|
||||
export function runOneToOneMode() {
|
||||
if (!oneToOneBaseURL) {
|
||||
return;
|
||||
}
|
||||
const res = send(oneToOneBaseURL, 'one_to_one');
|
||||
if (!res) {
|
||||
return;
|
||||
}
|
||||
oneToOneDurationMs.add(res.timings.duration, { mode: 'one_to_one' });
|
||||
oneToOneTTFTMs.add(res.timings.waiting, { mode: 'one_to_one' });
|
||||
oneToOneNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'one_to_one' });
|
||||
}
|
||||
|
||||
export function handleSummary(data) {
|
||||
return {
|
||||
stdout: `\nOpenAI WS 池化 vs 1:1 对比压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
|
||||
'docs/perf/openai-ws-pooling-compare-summary.json': JSON.stringify(data, null, 2),
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user