feat(openai): 极致优化 OAuth 链路并补齐性能守护
- 优化 /v1/responses 热路径,减少重复解析与不必要拷贝\n- 优化并发与 token 竞争路径并补齐运行指标\n- 补充 OpenAI/Ops 相关单元测试与回归用例\n- 新增灰度阈值守护与压测脚本,支撑发布验收
This commit is contained in:
164
tools/perf/openai_oauth_gray_drill.py
Executable file
164
tools/perf/openai_oauth_gray_drill.py
Executable file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
"""OpenAI OAuth 灰度发布演练脚本(本地模拟)。
|
||||
|
||||
该脚本会启动本地 mock Ops API,调用 openai_oauth_gray_guard.py,
|
||||
验证以下场景:
|
||||
1) A/B/C/D 四个灰度批次均通过
|
||||
2) 注入异常场景触发阈值告警并返回退出码 2(模拟自动回滚触发)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from pathlib import Path
|
||||
from typing import Dict, Tuple
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
GUARD_SCRIPT = ROOT / "tools" / "perf" / "openai_oauth_gray_guard.py"
|
||||
REPORT_PATH = ROOT / "docs" / "perf" / "openai-oauth-gray-drill-report.md"
|
||||
|
||||
|
||||
THRESHOLDS = {
|
||||
"sla_percent_min": 99.5,
|
||||
"ttft_p99_ms_max": 900,
|
||||
"request_error_rate_percent_max": 2.0,
|
||||
"upstream_error_rate_percent_max": 2.0,
|
||||
}
|
||||
|
||||
STAGE_SNAPSHOTS: Dict[str, Dict[str, float]] = {
|
||||
"A": {"sla": 99.78, "ttft": 780, "error_rate": 1.20, "upstream_error_rate": 1.05},
|
||||
"B": {"sla": 99.82, "ttft": 730, "error_rate": 1.05, "upstream_error_rate": 0.92},
|
||||
"C": {"sla": 99.86, "ttft": 680, "error_rate": 0.88, "upstream_error_rate": 0.80},
|
||||
"D": {"sla": 99.89, "ttft": 640, "error_rate": 0.72, "upstream_error_rate": 0.67},
|
||||
"rollback": {"sla": 97.10, "ttft": 1550, "error_rate": 6.30, "upstream_error_rate": 5.60},
|
||||
}
|
||||
|
||||
|
||||
class _MockHandler(BaseHTTPRequestHandler):
|
||||
def _write_json(self, payload: dict) -> None:
|
||||
raw = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(raw)))
|
||||
self.end_headers()
|
||||
self.wfile.write(raw)
|
||||
|
||||
def log_message(self, format: str, *args): # noqa: A003
|
||||
return
|
||||
|
||||
def do_GET(self): # noqa: N802
|
||||
parsed = urlparse(self.path)
|
||||
if parsed.path.endswith("/api/v1/admin/ops/settings/metric-thresholds"):
|
||||
self._write_json({"code": 0, "message": "success", "data": THRESHOLDS})
|
||||
return
|
||||
|
||||
if parsed.path.endswith("/api/v1/admin/ops/dashboard/overview"):
|
||||
q = parse_qs(parsed.query)
|
||||
stage = (q.get("group_id") or ["A"])[0]
|
||||
snapshot = STAGE_SNAPSHOTS.get(stage, STAGE_SNAPSHOTS["A"])
|
||||
self._write_json(
|
||||
{
|
||||
"code": 0,
|
||||
"message": "success",
|
||||
"data": {
|
||||
"sla": snapshot["sla"],
|
||||
"error_rate": snapshot["error_rate"],
|
||||
"upstream_error_rate": snapshot["upstream_error_rate"],
|
||||
"ttft": {"p99_ms": snapshot["ttft"]},
|
||||
},
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
|
||||
def run_guard(base_url: str, stage: str) -> Tuple[int, str]:
|
||||
cmd = [
|
||||
"python",
|
||||
str(GUARD_SCRIPT),
|
||||
"--base-url",
|
||||
base_url,
|
||||
"--platform",
|
||||
"openai",
|
||||
"--time-range",
|
||||
"30m",
|
||||
"--group-id",
|
||||
stage,
|
||||
]
|
||||
proc = subprocess.run(cmd, cwd=str(ROOT), capture_output=True, text=True)
|
||||
output = (proc.stdout + "\n" + proc.stderr).strip()
|
||||
return proc.returncode, output
|
||||
|
||||
|
||||
def main() -> int:
|
||||
server = HTTPServer(("127.0.0.1", 0), _MockHandler)
|
||||
host, port = server.server_address
|
||||
base_url = f"http://{host}:{port}"
|
||||
|
||||
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||
thread.start()
|
||||
|
||||
lines = [
|
||||
"# OpenAI OAuth 灰度守护演练报告",
|
||||
"",
|
||||
"> 类型:本地 mock 演练(用于验证灰度守护与回滚触发机制)",
|
||||
f"> 生成脚本:`tools/perf/openai_oauth_gray_drill.py`",
|
||||
"",
|
||||
"## 1. 灰度批次结果(6.1)",
|
||||
"",
|
||||
"| 批次 | 流量比例 | 守护脚本退出码 | 结果 |",
|
||||
"|---|---:|---:|---|",
|
||||
]
|
||||
|
||||
batch_plan = [("A", "5%"), ("B", "20%"), ("C", "50%"), ("D", "100%")]
|
||||
all_pass = True
|
||||
for stage, ratio in batch_plan:
|
||||
code, _ = run_guard(base_url, stage)
|
||||
ok = code == 0
|
||||
all_pass = all_pass and ok
|
||||
lines.append(f"| {stage} | {ratio} | {code} | {'通过' if ok else '失败'} |")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"## 2. 回滚触发演练(6.2)",
|
||||
"",
|
||||
])
|
||||
|
||||
rollback_code, rollback_output = run_guard(base_url, "rollback")
|
||||
rollback_triggered = rollback_code == 2
|
||||
lines.append(f"- 注入异常场景退出码:`{rollback_code}`")
|
||||
lines.append(f"- 是否触发回滚条件:`{'是' if rollback_triggered else '否'}`")
|
||||
lines.append("- 关键信息摘录:")
|
||||
excerpt = "\n".join(rollback_output.splitlines()[:8])
|
||||
lines.append("```text")
|
||||
lines.append(excerpt)
|
||||
lines.append("```")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"## 3. 验收结论(6.3)",
|
||||
"",
|
||||
f"- 批次灰度结果:`{'通过' if all_pass else '不通过'}`",
|
||||
f"- 回滚触发机制:`{'通过' if rollback_triggered else '不通过'}`",
|
||||
f"- 结论:`{'通过(可进入真实环境灰度)' if all_pass and rollback_triggered else '不通过(需修复后复测)'}`",
|
||||
])
|
||||
|
||||
REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
REPORT_PATH.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
|
||||
server.shutdown()
|
||||
server.server_close()
|
||||
|
||||
print(f"drill report generated: {REPORT_PATH}")
|
||||
return 0 if all_pass and rollback_triggered else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
213
tools/perf/openai_oauth_gray_guard.py
Executable file
213
tools/perf/openai_oauth_gray_guard.py
Executable file
@@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python3
|
||||
"""OpenAI OAuth 灰度阈值守护脚本。
|
||||
|
||||
用途:
|
||||
- 拉取 Ops 指标阈值配置与 Dashboard Overview 实时数据
|
||||
- 对比 P99 TTFT / 错误率 / SLA
|
||||
- 作为 6.2 灰度守护的自动化门禁(退出码可直接用于 CI/CD)
|
||||
|
||||
退出码:
|
||||
- 0: 指标通过
|
||||
- 1: 请求失败/参数错误
|
||||
- 2: 指标超阈值(建议停止扩量并回滚)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class GuardThresholds:
|
||||
sla_percent_min: Optional[float]
|
||||
ttft_p99_ms_max: Optional[float]
|
||||
request_error_rate_percent_max: Optional[float]
|
||||
upstream_error_rate_percent_max: Optional[float]
|
||||
|
||||
|
||||
@dataclass
|
||||
class GuardSnapshot:
|
||||
sla: Optional[float]
|
||||
ttft_p99_ms: Optional[float]
|
||||
request_error_rate_percent: Optional[float]
|
||||
upstream_error_rate_percent: Optional[float]
|
||||
|
||||
|
||||
def build_headers(token: str) -> Dict[str, str]:
|
||||
headers = {"Accept": "application/json"}
|
||||
if token.strip():
|
||||
headers["Authorization"] = f"Bearer {token.strip()}"
|
||||
return headers
|
||||
|
||||
|
||||
def request_json(url: str, headers: Dict[str, str]) -> Dict[str, Any]:
|
||||
req = urllib.request.Request(url=url, method="GET", headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
raw = resp.read().decode("utf-8")
|
||||
return json.loads(raw)
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"HTTP {e.code}: {body}") from e
|
||||
except urllib.error.URLError as e:
|
||||
raise RuntimeError(f"request failed: {e}") from e
|
||||
|
||||
|
||||
def parse_envelope_data(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if not isinstance(payload, dict):
|
||||
raise RuntimeError("invalid response payload")
|
||||
if payload.get("code") != 0:
|
||||
raise RuntimeError(f"api error: code={payload.get('code')} message={payload.get('message')}")
|
||||
data = payload.get("data")
|
||||
if not isinstance(data, dict):
|
||||
raise RuntimeError("invalid response data")
|
||||
return data
|
||||
|
||||
|
||||
def parse_thresholds(data: Dict[str, Any]) -> GuardThresholds:
|
||||
return GuardThresholds(
|
||||
sla_percent_min=to_float_or_none(data.get("sla_percent_min")),
|
||||
ttft_p99_ms_max=to_float_or_none(data.get("ttft_p99_ms_max")),
|
||||
request_error_rate_percent_max=to_float_or_none(data.get("request_error_rate_percent_max")),
|
||||
upstream_error_rate_percent_max=to_float_or_none(data.get("upstream_error_rate_percent_max")),
|
||||
)
|
||||
|
||||
|
||||
def parse_snapshot(data: Dict[str, Any]) -> GuardSnapshot:
|
||||
ttft = data.get("ttft") if isinstance(data.get("ttft"), dict) else {}
|
||||
return GuardSnapshot(
|
||||
sla=to_float_or_none(data.get("sla")),
|
||||
ttft_p99_ms=to_float_or_none(ttft.get("p99_ms")),
|
||||
request_error_rate_percent=to_float_or_none(data.get("error_rate")),
|
||||
upstream_error_rate_percent=to_float_or_none(data.get("upstream_error_rate")),
|
||||
)
|
||||
|
||||
|
||||
def to_float_or_none(v: Any) -> Optional[float]:
|
||||
if v is None:
|
||||
return None
|
||||
try:
|
||||
return float(v)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def evaluate(snapshot: GuardSnapshot, thresholds: GuardThresholds) -> List[str]:
|
||||
violations: List[str] = []
|
||||
|
||||
if thresholds.sla_percent_min is not None and snapshot.sla is not None:
|
||||
if snapshot.sla < thresholds.sla_percent_min:
|
||||
violations.append(
|
||||
f"SLA 低于阈值: actual={snapshot.sla:.2f}% threshold={thresholds.sla_percent_min:.2f}%"
|
||||
)
|
||||
|
||||
if thresholds.ttft_p99_ms_max is not None and snapshot.ttft_p99_ms is not None:
|
||||
if snapshot.ttft_p99_ms > thresholds.ttft_p99_ms_max:
|
||||
violations.append(
|
||||
f"TTFT P99 超阈值: actual={snapshot.ttft_p99_ms:.2f}ms threshold={thresholds.ttft_p99_ms_max:.2f}ms"
|
||||
)
|
||||
|
||||
if (
|
||||
thresholds.request_error_rate_percent_max is not None
|
||||
and snapshot.request_error_rate_percent is not None
|
||||
and snapshot.request_error_rate_percent > thresholds.request_error_rate_percent_max
|
||||
):
|
||||
violations.append(
|
||||
"请求错误率超阈值: "
|
||||
f"actual={snapshot.request_error_rate_percent:.2f}% "
|
||||
f"threshold={thresholds.request_error_rate_percent_max:.2f}%"
|
||||
)
|
||||
|
||||
if (
|
||||
thresholds.upstream_error_rate_percent_max is not None
|
||||
and snapshot.upstream_error_rate_percent is not None
|
||||
and snapshot.upstream_error_rate_percent > thresholds.upstream_error_rate_percent_max
|
||||
):
|
||||
violations.append(
|
||||
"上游错误率超阈值: "
|
||||
f"actual={snapshot.upstream_error_rate_percent:.2f}% "
|
||||
f"threshold={thresholds.upstream_error_rate_percent_max:.2f}%"
|
||||
)
|
||||
|
||||
return violations
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="OpenAI OAuth 灰度阈值守护")
|
||||
parser.add_argument("--base-url", required=True, help="服务地址,例如 http://127.0.0.1:5231")
|
||||
parser.add_argument("--admin-token", default="", help="Admin JWT(可选,按部署策略)")
|
||||
parser.add_argument("--platform", default="openai", help="平台过滤,默认 openai")
|
||||
parser.add_argument("--time-range", default="30m", help="时间窗口: 5m/30m/1h/6h/24h/7d/30d")
|
||||
parser.add_argument("--group-id", default="", help="可选 group_id")
|
||||
args = parser.parse_args()
|
||||
|
||||
base = args.base_url.rstrip("/")
|
||||
headers = build_headers(args.admin_token)
|
||||
|
||||
try:
|
||||
threshold_url = f"{base}/api/v1/admin/ops/settings/metric-thresholds"
|
||||
thresholds_raw = request_json(threshold_url, headers)
|
||||
thresholds = parse_thresholds(parse_envelope_data(thresholds_raw))
|
||||
|
||||
query = {"platform": args.platform, "time_range": args.time_range}
|
||||
if args.group_id.strip():
|
||||
query["group_id"] = args.group_id.strip()
|
||||
overview_url = (
|
||||
f"{base}/api/v1/admin/ops/dashboard/overview?"
|
||||
+ urllib.parse.urlencode(query)
|
||||
)
|
||||
overview_raw = request_json(overview_url, headers)
|
||||
snapshot = parse_snapshot(parse_envelope_data(overview_raw))
|
||||
|
||||
print("[OpenAI OAuth Gray Guard] 当前快照:")
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"sla": snapshot.sla,
|
||||
"ttft_p99_ms": snapshot.ttft_p99_ms,
|
||||
"request_error_rate_percent": snapshot.request_error_rate_percent,
|
||||
"upstream_error_rate_percent": snapshot.upstream_error_rate_percent,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
print("[OpenAI OAuth Gray Guard] 阈值配置:")
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"sla_percent_min": thresholds.sla_percent_min,
|
||||
"ttft_p99_ms_max": thresholds.ttft_p99_ms_max,
|
||||
"request_error_rate_percent_max": thresholds.request_error_rate_percent_max,
|
||||
"upstream_error_rate_percent_max": thresholds.upstream_error_rate_percent_max,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
|
||||
violations = evaluate(snapshot, thresholds)
|
||||
if violations:
|
||||
print("[OpenAI OAuth Gray Guard] 检测到阈值违例:")
|
||||
for idx, line in enumerate(violations, start=1):
|
||||
print(f" {idx}. {line}")
|
||||
print("[OpenAI OAuth Gray Guard] 建议:停止扩量并执行回滚。")
|
||||
return 2
|
||||
|
||||
print("[OpenAI OAuth Gray Guard] 指标通过,可继续观察或按计划扩量。")
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
print(f"[OpenAI OAuth Gray Guard] 执行失败: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
122
tools/perf/openai_oauth_responses_k6.js
Normal file
122
tools/perf/openai_oauth_responses_k6.js
Normal file
@@ -0,0 +1,122 @@
|
||||
import http from 'k6/http';
|
||||
import { check } from 'k6';
|
||||
import { Rate, Trend } from 'k6/metrics';
|
||||
|
||||
const baseURL = __ENV.BASE_URL || 'http://127.0.0.1:5231';
|
||||
const apiKey = __ENV.API_KEY || '';
|
||||
const model = __ENV.MODEL || 'gpt-5';
|
||||
const timeout = __ENV.TIMEOUT || '180s';
|
||||
|
||||
const nonStreamRPS = Number(__ENV.NON_STREAM_RPS || 8);
|
||||
const streamRPS = Number(__ENV.STREAM_RPS || 4);
|
||||
const duration = __ENV.DURATION || '3m';
|
||||
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 30);
|
||||
const maxVUs = Number(__ENV.MAX_VUS || 200);
|
||||
|
||||
const reqDurationMs = new Trend('openai_oauth_req_duration_ms', true);
|
||||
const ttftMs = new Trend('openai_oauth_ttft_ms', true);
|
||||
const non2xxRate = new Rate('openai_oauth_non2xx_rate');
|
||||
const streamDoneRate = new Rate('openai_oauth_stream_done_rate');
|
||||
|
||||
export const options = {
|
||||
scenarios: {
|
||||
non_stream: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: nonStreamRPS,
|
||||
timeUnit: '1s',
|
||||
duration,
|
||||
preAllocatedVUs,
|
||||
maxVUs,
|
||||
exec: 'runNonStream',
|
||||
tags: { request_type: 'non_stream' },
|
||||
},
|
||||
stream: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: streamRPS,
|
||||
timeUnit: '1s',
|
||||
duration,
|
||||
preAllocatedVUs,
|
||||
maxVUs,
|
||||
exec: 'runStream',
|
||||
tags: { request_type: 'stream' },
|
||||
},
|
||||
},
|
||||
thresholds: {
|
||||
openai_oauth_non2xx_rate: ['rate<0.01'],
|
||||
openai_oauth_req_duration_ms: ['p(95)<3000', 'p(99)<6000'],
|
||||
openai_oauth_ttft_ms: ['p(99)<1200'],
|
||||
openai_oauth_stream_done_rate: ['rate>0.99'],
|
||||
},
|
||||
};
|
||||
|
||||
function buildHeaders() {
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'codex_cli_rs/0.1.0',
|
||||
};
|
||||
if (apiKey) {
|
||||
headers.Authorization = `Bearer ${apiKey}`;
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
function buildBody(stream) {
|
||||
return JSON.stringify({
|
||||
model,
|
||||
stream,
|
||||
input: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'input_text',
|
||||
text: '请返回一句极短的话:pong',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_output_tokens: 32,
|
||||
});
|
||||
}
|
||||
|
||||
function recordMetrics(res, stream) {
|
||||
reqDurationMs.add(res.timings.duration, { request_type: stream ? 'stream' : 'non_stream' });
|
||||
ttftMs.add(res.timings.waiting, { request_type: stream ? 'stream' : 'non_stream' });
|
||||
non2xxRate.add(res.status < 200 || res.status >= 300, { request_type: stream ? 'stream' : 'non_stream' });
|
||||
|
||||
if (stream) {
|
||||
const done = !!res.body && res.body.indexOf('[DONE]') >= 0;
|
||||
streamDoneRate.add(done, { request_type: 'stream' });
|
||||
}
|
||||
}
|
||||
|
||||
function postResponses(stream) {
|
||||
const url = `${baseURL}/v1/responses`;
|
||||
const res = http.post(url, buildBody(stream), {
|
||||
headers: buildHeaders(),
|
||||
timeout,
|
||||
tags: { endpoint: '/v1/responses', request_type: stream ? 'stream' : 'non_stream' },
|
||||
});
|
||||
|
||||
check(res, {
|
||||
'status is 2xx': (r) => r.status >= 200 && r.status < 300,
|
||||
});
|
||||
|
||||
recordMetrics(res, stream);
|
||||
return res;
|
||||
}
|
||||
|
||||
export function runNonStream() {
|
||||
postResponses(false);
|
||||
}
|
||||
|
||||
export function runStream() {
|
||||
postResponses(true);
|
||||
}
|
||||
|
||||
export function handleSummary(data) {
|
||||
return {
|
||||
stdout: `\nOpenAI OAuth /v1/responses 基线完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
|
||||
'docs/perf/openai-oauth-k6-summary.json': JSON.stringify(data, null, 2),
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user