feat(sync): full code sync from release

This commit is contained in:
yangjianbo
2026-02-28 15:01:20 +08:00
parent bfc7b339f7
commit bb664d9bbf
338 changed files with 54513 additions and 2011 deletions

View File

@@ -0,0 +1,167 @@
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
const httpAPIKey = (__ENV.HTTP_API_KEY || '').trim();
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
const model = __ENV.MODEL || 'gpt-5.1';
const duration = __ENV.DURATION || '5m';
const timeout = __ENV.TIMEOUT || '180s';
const httpRPS = Number(__ENV.HTTP_RPS || 10);
const wsRPS = Number(__ENV.WS_RPS || 10);
const chainRPS = Number(__ENV.CHAIN_RPS || 1);
const chainRounds = Number(__ENV.CHAIN_ROUNDS || 20);
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 40);
const maxVUs = Number(__ENV.MAX_VUS || 300);
const httpDurationMs = new Trend('openai_http_req_duration_ms', true);
const wsDurationMs = new Trend('openai_ws_req_duration_ms', true);
const wsChainDurationMs = new Trend('openai_ws_chain_round_duration_ms', true);
const wsChainTTFTMs = new Trend('openai_ws_chain_round_ttft_ms', true);
const httpNon2xxRate = new Rate('openai_http_non2xx_rate');
const wsNon2xxRate = new Rate('openai_ws_non2xx_rate');
const wsChainRoundSuccessRate = new Rate('openai_ws_chain_round_success_rate');
export const options = {
scenarios: {
http_baseline: {
executor: 'constant-arrival-rate',
exec: 'runHTTPBaseline',
rate: httpRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { path: 'http_baseline' },
},
ws_baseline: {
executor: 'constant-arrival-rate',
exec: 'runWSBaseline',
rate: wsRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { path: 'ws_baseline' },
},
ws_chain_20_rounds: {
executor: 'constant-arrival-rate',
exec: 'runWSChain20Rounds',
rate: chainRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(2, Math.ceil(chainRPS * 2)),
maxVUs: Math.max(20, Math.ceil(chainRPS * 10)),
tags: { path: 'ws_chain_20_rounds' },
},
},
thresholds: {
openai_http_non2xx_rate: ['rate<0.02'],
openai_ws_non2xx_rate: ['rate<0.02'],
openai_http_req_duration_ms: ['p(95)<4000', 'p(99)<7000'],
openai_ws_req_duration_ms: ['p(95)<3000', 'p(99)<6000'],
openai_ws_chain_round_success_rate: ['rate>0.98'],
openai_ws_chain_round_ttft_ms: ['p(99)<1200'],
},
};
function buildHeaders(apiKey) {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.98.0',
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
return headers;
}
function buildBody(previousResponseID) {
const body = {
model,
stream: false,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '请回复一个单词: pong' }],
},
],
max_output_tokens: 64,
};
if (previousResponseID) {
body.previous_response_id = previousResponseID;
}
return JSON.stringify(body);
}
function postResponses(apiKey, body, tags) {
const res = http.post(`${baseURL}/v1/responses`, body, {
headers: buildHeaders(apiKey),
timeout,
tags,
});
check(res, {
'status is 2xx': (r) => r.status >= 200 && r.status < 300,
});
return res;
}
function parseResponseID(res) {
if (!res || !res.body) {
return '';
}
try {
const payload = JSON.parse(res.body);
if (payload && typeof payload.id === 'string') {
return payload.id.trim();
}
} catch (_) {
return '';
}
return '';
}
export function runHTTPBaseline() {
const res = postResponses(httpAPIKey, buildBody(''), { transport: 'http' });
httpDurationMs.add(res.timings.duration, { transport: 'http' });
httpNon2xxRate.add(res.status < 200 || res.status >= 300, { transport: 'http' });
}
export function runWSBaseline() {
const res = postResponses(wsAPIKey, buildBody(''), { transport: 'ws_v2' });
wsDurationMs.add(res.timings.duration, { transport: 'ws_v2' });
wsNon2xxRate.add(res.status < 200 || res.status >= 300, { transport: 'ws_v2' });
}
// 20+ 轮续链专项,验证 previous_response_id 在长链下的稳定性与时延。
export function runWSChain20Rounds() {
let previousResponseID = '';
for (let round = 1; round <= chainRounds; round += 1) {
const roundStart = Date.now();
const res = postResponses(wsAPIKey, buildBody(previousResponseID), { transport: 'ws_v2_chain' });
const ok = res.status >= 200 && res.status < 300;
wsChainRoundSuccessRate.add(ok, { round: `${round}` });
wsChainDurationMs.add(Date.now() - roundStart, { round: `${round}` });
wsChainTTFTMs.add(res.timings.waiting, { round: `${round}` });
wsNon2xxRate.add(!ok, { transport: 'ws_v2_chain' });
if (!ok) {
return;
}
const respID = parseResponseID(res);
if (!respID) {
wsChainRoundSuccessRate.add(false, { round: `${round}`, reason: 'missing_response_id' });
return;
}
previousResponseID = respID;
sleep(0.01);
}
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI WSv2 对比压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-ws-v2-compare-summary.json': JSON.stringify(data, null, 2),
};
}

View File

@@ -0,0 +1,123 @@
import http from 'k6/http';
import { check } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const pooledBaseURL = (__ENV.POOLED_BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
const oneToOneBaseURL = (__ENV.ONE_TO_ONE_BASE_URL || '').replace(/\/$/, '');
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
const model = __ENV.MODEL || 'gpt-5.1';
const timeout = __ENV.TIMEOUT || '180s';
const duration = __ENV.DURATION || '5m';
const pooledRPS = Number(__ENV.POOLED_RPS || 12);
const oneToOneRPS = Number(__ENV.ONE_TO_ONE_RPS || 12);
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
const maxVUs = Number(__ENV.MAX_VUS || 400);
const pooledDurationMs = new Trend('openai_ws_pooled_duration_ms', true);
const oneToOneDurationMs = new Trend('openai_ws_one_to_one_duration_ms', true);
const pooledTTFTMs = new Trend('openai_ws_pooled_ttft_ms', true);
const oneToOneTTFTMs = new Trend('openai_ws_one_to_one_ttft_ms', true);
const pooledNon2xxRate = new Rate('openai_ws_pooled_non2xx_rate');
const oneToOneNon2xxRate = new Rate('openai_ws_one_to_one_non2xx_rate');
export const options = {
scenarios: {
pooled_mode: {
executor: 'constant-arrival-rate',
exec: 'runPooledMode',
rate: pooledRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { mode: 'pooled' },
},
one_to_one_mode: {
executor: 'constant-arrival-rate',
exec: 'runOneToOneMode',
rate: oneToOneRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { mode: 'one_to_one' },
startTime: '5s',
},
},
thresholds: {
openai_ws_pooled_non2xx_rate: ['rate<0.02'],
openai_ws_one_to_one_non2xx_rate: ['rate<0.02'],
openai_ws_pooled_duration_ms: ['p(95)<3000', 'p(99)<6000'],
openai_ws_one_to_one_duration_ms: ['p(95)<6000', 'p(99)<10000'],
},
};
function buildHeaders() {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.98.0',
};
if (wsAPIKey) {
headers.Authorization = `Bearer ${wsAPIKey}`;
}
return headers;
}
function buildBody() {
return JSON.stringify({
model,
stream: false,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '请回复: pong' }],
},
],
max_output_tokens: 48,
});
}
function send(baseURL, mode) {
if (!baseURL) {
return null;
}
const res = http.post(`${baseURL}/v1/responses`, buildBody(), {
headers: buildHeaders(),
timeout,
tags: { mode },
});
check(res, {
'status is 2xx': (r) => r.status >= 200 && r.status < 300,
});
return res;
}
export function runPooledMode() {
const res = send(pooledBaseURL, 'pooled');
if (!res) {
return;
}
pooledDurationMs.add(res.timings.duration, { mode: 'pooled' });
pooledTTFTMs.add(res.timings.waiting, { mode: 'pooled' });
pooledNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'pooled' });
}
export function runOneToOneMode() {
if (!oneToOneBaseURL) {
return;
}
const res = send(oneToOneBaseURL, 'one_to_one');
if (!res) {
return;
}
oneToOneDurationMs.add(res.timings.duration, { mode: 'one_to_one' });
oneToOneTTFTMs.add(res.timings.waiting, { mode: 'one_to_one' });
oneToOneNon2xxRate.add(res.status < 200 || res.status >= 300, { mode: 'one_to_one' });
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI WS 池化 vs 1:1 对比压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-ws-pooling-compare-summary.json': JSON.stringify(data, null, 2),
};
}

View File

@@ -0,0 +1,216 @@
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const baseURL = (__ENV.BASE_URL || 'http://127.0.0.1:5231').replace(/\/$/, '');
const wsAPIKey = (__ENV.WS_API_KEY || '').trim();
const wsHotspotAPIKey = (__ENV.WS_HOTSPOT_API_KEY || wsAPIKey).trim();
const model = __ENV.MODEL || 'gpt-5.3-codex';
const duration = __ENV.DURATION || '5m';
const timeout = __ENV.TIMEOUT || '180s';
const shortRPS = Number(__ENV.SHORT_RPS || 12);
const longRPS = Number(__ENV.LONG_RPS || 4);
const errorRPS = Number(__ENV.ERROR_RPS || 2);
const hotspotRPS = Number(__ENV.HOTSPOT_RPS || 10);
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 50);
const maxVUs = Number(__ENV.MAX_VUS || 400);
const reqDurationMs = new Trend('openai_ws_v2_perf_req_duration_ms', true);
const ttftMs = new Trend('openai_ws_v2_perf_ttft_ms', true);
const non2xxRate = new Rate('openai_ws_v2_perf_non2xx_rate');
const doneRate = new Rate('openai_ws_v2_perf_done_rate');
const expectedErrorRate = new Rate('openai_ws_v2_perf_expected_error_rate');
export const options = {
scenarios: {
short_request: {
executor: 'constant-arrival-rate',
exec: 'runShortRequest',
rate: shortRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
tags: { scenario: 'short_request' },
},
long_request: {
executor: 'constant-arrival-rate',
exec: 'runLongRequest',
rate: longRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(20, Math.ceil(longRPS * 6)),
maxVUs: Math.max(100, Math.ceil(longRPS * 20)),
tags: { scenario: 'long_request' },
},
error_injection: {
executor: 'constant-arrival-rate',
exec: 'runErrorInjection',
rate: errorRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(8, Math.ceil(errorRPS * 4)),
maxVUs: Math.max(40, Math.ceil(errorRPS * 12)),
tags: { scenario: 'error_injection' },
},
hotspot_account: {
executor: 'constant-arrival-rate',
exec: 'runHotspotAccount',
rate: hotspotRPS,
timeUnit: '1s',
duration,
preAllocatedVUs: Math.max(16, Math.ceil(hotspotRPS * 3)),
maxVUs: Math.max(80, Math.ceil(hotspotRPS * 10)),
tags: { scenario: 'hotspot_account' },
},
},
thresholds: {
openai_ws_v2_perf_non2xx_rate: ['rate<0.05'],
openai_ws_v2_perf_req_duration_ms: ['p(95)<5000', 'p(99)<9000'],
openai_ws_v2_perf_ttft_ms: ['p(99)<2000'],
openai_ws_v2_perf_done_rate: ['rate>0.95'],
},
};
function buildHeaders(apiKey, opts = {}) {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.104.0',
'OpenAI-Beta': 'responses_websockets=2026-02-06,responses=experimental',
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
if (opts.sessionID) {
headers.session_id = opts.sessionID;
}
if (opts.conversationID) {
headers.conversation_id = opts.conversationID;
}
return headers;
}
function shortBody() {
return JSON.stringify({
model,
stream: false,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '请回复一个词pong' }],
},
],
max_output_tokens: 64,
});
}
function longBody() {
const tools = [];
for (let i = 0; i < 28; i += 1) {
tools.push({
type: 'function',
name: `perf_tool_${i}`,
description: 'load test tool schema',
parameters: {
type: 'object',
properties: {
query: { type: 'string' },
limit: { type: 'number' },
with_cache: { type: 'boolean' },
},
required: ['query'],
},
});
}
const input = [];
for (let i = 0; i < 20; i += 1) {
input.push({
role: 'user',
content: [{ type: 'input_text', text: `长请求压测消息 ${i}: 请输出简要摘要。` }],
});
}
return JSON.stringify({
model,
stream: false,
input,
tools,
parallel_tool_calls: true,
max_output_tokens: 256,
reasoning: { effort: 'medium' },
instructions: '你是压测助手,简洁回复。',
});
}
function errorInjectionBody() {
return JSON.stringify({
model,
stream: false,
previous_response_id: `resp_not_found_${__VU}_${__ITER}`,
input: [
{
role: 'user',
content: [{ type: 'input_text', text: '触发错误注入路径。' }],
},
],
});
}
function postResponses(apiKey, body, tags, opts = {}) {
const res = http.post(`${baseURL}/v1/responses`, body, {
headers: buildHeaders(apiKey, opts),
timeout,
tags,
});
reqDurationMs.add(res.timings.duration, tags);
ttftMs.add(res.timings.waiting, tags);
non2xxRate.add(res.status < 200 || res.status >= 300, tags);
return res;
}
function hasDone(res) {
return !!res && !!res.body && res.body.indexOf('[DONE]') >= 0;
}
export function runShortRequest() {
const tags = { scenario: 'short_request' };
const res = postResponses(wsAPIKey, shortBody(), tags);
check(res, { 'short status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
}
export function runLongRequest() {
const tags = { scenario: 'long_request' };
const res = postResponses(wsAPIKey, longBody(), tags);
check(res, { 'long status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
}
export function runErrorInjection() {
const tags = { scenario: 'error_injection' };
const res = postResponses(wsAPIKey, errorInjectionBody(), tags);
// 错误注入场景允许 4xx/5xx重点观测 fallback 和错误路径抖动。
expectedErrorRate.add(res.status >= 400, tags);
doneRate.add(hasDone(res), tags);
}
export function runHotspotAccount() {
const tags = { scenario: 'hotspot_account' };
const opts = {
sessionID: 'perf-hotspot-session-fixed',
conversationID: 'perf-hotspot-conversation-fixed',
};
const res = postResponses(wsHotspotAPIKey, shortBody(), tags, opts);
check(res, { 'hotspot status is 2xx': (r) => r.status >= 200 && r.status < 300 });
doneRate.add(hasDone(res) || (res.status >= 200 && res.status < 300), tags);
sleep(0.01);
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI WSv2 性能套件压测完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-ws-v2-perf-suite-summary.json': JSON.stringify(data, null, 2),
};
}

192
tools/sora-test Executable file
View File

@@ -0,0 +1,192 @@
#!/usr/bin/env python3
"""
Sora access token tester.
Usage:
tools/sora-test -at "<ACCESS_TOKEN>"
"""
from __future__ import annotations
import argparse
import base64
import json
import sys
import textwrap
import urllib.error
import urllib.request
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Dict, Optional, Tuple
DEFAULT_BASE_URL = "https://sora.chatgpt.com"
DEFAULT_TIMEOUT = 20
DEFAULT_USER_AGENT = "Sora/1.2026.007 (Android 15; 24122RKC7C; build 2600700)"
@dataclass
class EndpointResult:
path: str
status: int
request_id: str
cf_ray: str
body_preview: str
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Test Sora access token against core backend endpoints.",
formatter_class=argparse.RawTextHelpFormatter,
epilog=textwrap.dedent(
"""\
Examples:
tools/sora-test -at "eyJhbGciOi..."
tools/sora-test -at "eyJhbGciOi..." --timeout 30
"""
),
)
parser.add_argument("-at", "--access-token", required=True, help="Sora/OpenAI access token (JWT)")
parser.add_argument(
"--base-url",
default=DEFAULT_BASE_URL,
help=f"Base URL for Sora backend (default: {DEFAULT_BASE_URL})",
)
parser.add_argument(
"--timeout",
type=int,
default=DEFAULT_TIMEOUT,
help=f"HTTP timeout seconds (default: {DEFAULT_TIMEOUT})",
)
return parser.parse_args()
def mask_token(token: str) -> str:
if len(token) <= 16:
return token
return f"{token[:10]}...{token[-6:]}"
def decode_jwt_payload(token: str) -> Optional[Dict]:
parts = token.split(".")
if len(parts) != 3:
return None
payload = parts[1]
payload += "=" * ((4 - len(payload) % 4) % 4)
payload = payload.replace("-", "+").replace("_", "/")
try:
decoded = base64.b64decode(payload)
return json.loads(decoded.decode("utf-8", errors="replace"))
except Exception:
return None
def ts_to_iso(ts: Optional[int]) -> str:
if not ts:
return "-"
try:
return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()
except Exception:
return "-"
def http_get(base_url: str, path: str, access_token: str, timeout: int) -> EndpointResult:
url = base_url.rstrip("/") + path
req = urllib.request.Request(url=url, method="GET")
req.add_header("Authorization", f"Bearer {access_token}")
req.add_header("Accept", "application/json, text/plain, */*")
req.add_header("Origin", DEFAULT_BASE_URL)
req.add_header("Referer", DEFAULT_BASE_URL + "/")
req.add_header("User-Agent", DEFAULT_USER_AGENT)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
raw = resp.read()
body = raw.decode("utf-8", errors="replace")
return EndpointResult(
path=path,
status=resp.getcode(),
request_id=(resp.headers.get("x-request-id") or "").strip(),
cf_ray=(resp.headers.get("cf-ray") or "").strip(),
body_preview=body[:500].replace("\n", " "),
)
except urllib.error.HTTPError as e:
raw = e.read()
body = raw.decode("utf-8", errors="replace")
return EndpointResult(
path=path,
status=e.code,
request_id=(e.headers.get("x-request-id") if e.headers else "") or "",
cf_ray=(e.headers.get("cf-ray") if e.headers else "") or "",
body_preview=body[:500].replace("\n", " "),
)
except Exception as e:
return EndpointResult(
path=path,
status=0,
request_id="",
cf_ray="",
body_preview=f"network_error: {e}",
)
def classify(me_status: int) -> Tuple[str, int]:
if me_status == 200:
return "AT looks valid for Sora (/backend/me == 200).", 0
if me_status == 401:
return "AT is invalid or expired (/backend/me == 401).", 2
if me_status == 403:
return "AT may be blocked by policy/challenge or lacks permission (/backend/me == 403).", 3
if me_status == 0:
return "Request failed before reaching Sora (network/proxy/TLS issue).", 4
return f"Unexpected status on /backend/me: {me_status}", 5
def main() -> int:
args = parse_args()
token = args.access_token.strip()
if not token:
print("ERROR: empty access token")
return 1
payload = decode_jwt_payload(token)
print("=== Sora AT Test ===")
print(f"token: {mask_token(token)}")
if payload:
exp = payload.get("exp")
iat = payload.get("iat")
scopes = payload.get("scp")
scope_count = len(scopes) if isinstance(scopes, list) else 0
print(f"jwt.iat: {iat} ({ts_to_iso(iat)})")
print(f"jwt.exp: {exp} ({ts_to_iso(exp)})")
print(f"jwt.scope_count: {scope_count}")
else:
print("jwt: payload decode failed (token may not be JWT)")
endpoints = [
"/backend/me",
"/backend/nf/check",
"/backend/project_y/invite/mine",
"/backend/billing/subscriptions",
]
print("\n--- endpoint checks ---")
results = []
for path in endpoints:
res = http_get(args.base_url, path, token, args.timeout)
results.append(res)
print(f"{res.path} -> status={res.status} request_id={res.request_id or '-'} cf_ray={res.cf_ray or '-'}")
if res.body_preview:
print(f" body: {res.body_preview}")
me_result = next((r for r in results if r.path == "/backend/me"), None)
me_status = me_result.status if me_result else 0
summary, code = classify(me_status)
print("\n--- summary ---")
print(summary)
return code
if __name__ == "__main__":
sys.exit(main())