feat(openai): 极致优化 OAuth 链路并补齐性能守护

- 优化 /v1/responses 热路径,减少重复解析与不必要拷贝\n- 优化并发与 token 竞争路径并补齐运行指标\n- 补充 OpenAI/Ops 相关单元测试与回归用例\n- 新增灰度阈值守护与压测脚本,支撑发布验收
This commit is contained in:
yangjianbo
2026-02-12 09:41:37 +08:00
parent a88bb8684f
commit 61a2bf469a
16 changed files with 1519 additions and 135 deletions

View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""OpenAI OAuth 灰度发布演练脚本(本地模拟)。
该脚本会启动本地 mock Ops API调用 openai_oauth_gray_guard.py
验证以下场景:
1) A/B/C/D 四个灰度批次均通过
2) 注入异常场景触发阈值告警并返回退出码 2模拟自动回滚触发
"""
from __future__ import annotations
import json
import subprocess
import threading
from dataclasses import dataclass
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
from typing import Dict, Tuple
from urllib.parse import parse_qs, urlparse
ROOT = Path(__file__).resolve().parents[2]
GUARD_SCRIPT = ROOT / "tools" / "perf" / "openai_oauth_gray_guard.py"
REPORT_PATH = ROOT / "docs" / "perf" / "openai-oauth-gray-drill-report.md"
THRESHOLDS = {
"sla_percent_min": 99.5,
"ttft_p99_ms_max": 900,
"request_error_rate_percent_max": 2.0,
"upstream_error_rate_percent_max": 2.0,
}
STAGE_SNAPSHOTS: Dict[str, Dict[str, float]] = {
"A": {"sla": 99.78, "ttft": 780, "error_rate": 1.20, "upstream_error_rate": 1.05},
"B": {"sla": 99.82, "ttft": 730, "error_rate": 1.05, "upstream_error_rate": 0.92},
"C": {"sla": 99.86, "ttft": 680, "error_rate": 0.88, "upstream_error_rate": 0.80},
"D": {"sla": 99.89, "ttft": 640, "error_rate": 0.72, "upstream_error_rate": 0.67},
"rollback": {"sla": 97.10, "ttft": 1550, "error_rate": 6.30, "upstream_error_rate": 5.60},
}
class _MockHandler(BaseHTTPRequestHandler):
def _write_json(self, payload: dict) -> None:
raw = json.dumps(payload, ensure_ascii=False).encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(raw)))
self.end_headers()
self.wfile.write(raw)
def log_message(self, format: str, *args): # noqa: A003
return
def do_GET(self): # noqa: N802
parsed = urlparse(self.path)
if parsed.path.endswith("/api/v1/admin/ops/settings/metric-thresholds"):
self._write_json({"code": 0, "message": "success", "data": THRESHOLDS})
return
if parsed.path.endswith("/api/v1/admin/ops/dashboard/overview"):
q = parse_qs(parsed.query)
stage = (q.get("group_id") or ["A"])[0]
snapshot = STAGE_SNAPSHOTS.get(stage, STAGE_SNAPSHOTS["A"])
self._write_json(
{
"code": 0,
"message": "success",
"data": {
"sla": snapshot["sla"],
"error_rate": snapshot["error_rate"],
"upstream_error_rate": snapshot["upstream_error_rate"],
"ttft": {"p99_ms": snapshot["ttft"]},
},
}
)
return
self.send_response(404)
self.end_headers()
def run_guard(base_url: str, stage: str) -> Tuple[int, str]:
cmd = [
"python",
str(GUARD_SCRIPT),
"--base-url",
base_url,
"--platform",
"openai",
"--time-range",
"30m",
"--group-id",
stage,
]
proc = subprocess.run(cmd, cwd=str(ROOT), capture_output=True, text=True)
output = (proc.stdout + "\n" + proc.stderr).strip()
return proc.returncode, output
def main() -> int:
server = HTTPServer(("127.0.0.1", 0), _MockHandler)
host, port = server.server_address
base_url = f"http://{host}:{port}"
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
lines = [
"# OpenAI OAuth 灰度守护演练报告",
"",
"> 类型:本地 mock 演练(用于验证灰度守护与回滚触发机制)",
f"> 生成脚本:`tools/perf/openai_oauth_gray_drill.py`",
"",
"## 1. 灰度批次结果6.1",
"",
"| 批次 | 流量比例 | 守护脚本退出码 | 结果 |",
"|---|---:|---:|---|",
]
batch_plan = [("A", "5%"), ("B", "20%"), ("C", "50%"), ("D", "100%")]
all_pass = True
for stage, ratio in batch_plan:
code, _ = run_guard(base_url, stage)
ok = code == 0
all_pass = all_pass and ok
lines.append(f"| {stage} | {ratio} | {code} | {'通过' if ok else '失败'} |")
lines.extend([
"",
"## 2. 回滚触发演练6.2",
"",
])
rollback_code, rollback_output = run_guard(base_url, "rollback")
rollback_triggered = rollback_code == 2
lines.append(f"- 注入异常场景退出码:`{rollback_code}`")
lines.append(f"- 是否触发回滚条件:`{'' if rollback_triggered else ''}`")
lines.append("- 关键信息摘录:")
excerpt = "\n".join(rollback_output.splitlines()[:8])
lines.append("```text")
lines.append(excerpt)
lines.append("```")
lines.extend([
"",
"## 3. 验收结论6.3",
"",
f"- 批次灰度结果:`{'通过' if all_pass else '不通过'}`",
f"- 回滚触发机制:`{'通过' if rollback_triggered else '不通过'}`",
f"- 结论:`{'通过(可进入真实环境灰度)' if all_pass and rollback_triggered else '不通过(需修复后复测)'}`",
])
REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
REPORT_PATH.write_text("\n".join(lines) + "\n", encoding="utf-8")
server.shutdown()
server.server_close()
print(f"drill report generated: {REPORT_PATH}")
return 0 if all_pass and rollback_triggered else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,213 @@
#!/usr/bin/env python3
"""OpenAI OAuth 灰度阈值守护脚本。
用途:
- 拉取 Ops 指标阈值配置与 Dashboard Overview 实时数据
- 对比 P99 TTFT / 错误率 / SLA
- 作为 6.2 灰度守护的自动化门禁(退出码可直接用于 CI/CD
退出码:
- 0: 指标通过
- 1: 请求失败/参数错误
- 2: 指标超阈值(建议停止扩量并回滚)
"""
from __future__ import annotations
import argparse
import json
import sys
import urllib.error
import urllib.parse
import urllib.request
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
@dataclass
class GuardThresholds:
sla_percent_min: Optional[float]
ttft_p99_ms_max: Optional[float]
request_error_rate_percent_max: Optional[float]
upstream_error_rate_percent_max: Optional[float]
@dataclass
class GuardSnapshot:
sla: Optional[float]
ttft_p99_ms: Optional[float]
request_error_rate_percent: Optional[float]
upstream_error_rate_percent: Optional[float]
def build_headers(token: str) -> Dict[str, str]:
headers = {"Accept": "application/json"}
if token.strip():
headers["Authorization"] = f"Bearer {token.strip()}"
return headers
def request_json(url: str, headers: Dict[str, str]) -> Dict[str, Any]:
req = urllib.request.Request(url=url, method="GET", headers=headers)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
raw = resp.read().decode("utf-8")
return json.loads(raw)
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"HTTP {e.code}: {body}") from e
except urllib.error.URLError as e:
raise RuntimeError(f"request failed: {e}") from e
def parse_envelope_data(payload: Dict[str, Any]) -> Dict[str, Any]:
if not isinstance(payload, dict):
raise RuntimeError("invalid response payload")
if payload.get("code") != 0:
raise RuntimeError(f"api error: code={payload.get('code')} message={payload.get('message')}")
data = payload.get("data")
if not isinstance(data, dict):
raise RuntimeError("invalid response data")
return data
def parse_thresholds(data: Dict[str, Any]) -> GuardThresholds:
return GuardThresholds(
sla_percent_min=to_float_or_none(data.get("sla_percent_min")),
ttft_p99_ms_max=to_float_or_none(data.get("ttft_p99_ms_max")),
request_error_rate_percent_max=to_float_or_none(data.get("request_error_rate_percent_max")),
upstream_error_rate_percent_max=to_float_or_none(data.get("upstream_error_rate_percent_max")),
)
def parse_snapshot(data: Dict[str, Any]) -> GuardSnapshot:
ttft = data.get("ttft") if isinstance(data.get("ttft"), dict) else {}
return GuardSnapshot(
sla=to_float_or_none(data.get("sla")),
ttft_p99_ms=to_float_or_none(ttft.get("p99_ms")),
request_error_rate_percent=to_float_or_none(data.get("error_rate")),
upstream_error_rate_percent=to_float_or_none(data.get("upstream_error_rate")),
)
def to_float_or_none(v: Any) -> Optional[float]:
if v is None:
return None
try:
return float(v)
except (TypeError, ValueError):
return None
def evaluate(snapshot: GuardSnapshot, thresholds: GuardThresholds) -> List[str]:
violations: List[str] = []
if thresholds.sla_percent_min is not None and snapshot.sla is not None:
if snapshot.sla < thresholds.sla_percent_min:
violations.append(
f"SLA 低于阈值: actual={snapshot.sla:.2f}% threshold={thresholds.sla_percent_min:.2f}%"
)
if thresholds.ttft_p99_ms_max is not None and snapshot.ttft_p99_ms is not None:
if snapshot.ttft_p99_ms > thresholds.ttft_p99_ms_max:
violations.append(
f"TTFT P99 超阈值: actual={snapshot.ttft_p99_ms:.2f}ms threshold={thresholds.ttft_p99_ms_max:.2f}ms"
)
if (
thresholds.request_error_rate_percent_max is not None
and snapshot.request_error_rate_percent is not None
and snapshot.request_error_rate_percent > thresholds.request_error_rate_percent_max
):
violations.append(
"请求错误率超阈值: "
f"actual={snapshot.request_error_rate_percent:.2f}% "
f"threshold={thresholds.request_error_rate_percent_max:.2f}%"
)
if (
thresholds.upstream_error_rate_percent_max is not None
and snapshot.upstream_error_rate_percent is not None
and snapshot.upstream_error_rate_percent > thresholds.upstream_error_rate_percent_max
):
violations.append(
"上游错误率超阈值: "
f"actual={snapshot.upstream_error_rate_percent:.2f}% "
f"threshold={thresholds.upstream_error_rate_percent_max:.2f}%"
)
return violations
def main() -> int:
parser = argparse.ArgumentParser(description="OpenAI OAuth 灰度阈值守护")
parser.add_argument("--base-url", required=True, help="服务地址,例如 http://127.0.0.1:5231")
parser.add_argument("--admin-token", default="", help="Admin JWT可选按部署策略")
parser.add_argument("--platform", default="openai", help="平台过滤,默认 openai")
parser.add_argument("--time-range", default="30m", help="时间窗口: 5m/30m/1h/6h/24h/7d/30d")
parser.add_argument("--group-id", default="", help="可选 group_id")
args = parser.parse_args()
base = args.base_url.rstrip("/")
headers = build_headers(args.admin_token)
try:
threshold_url = f"{base}/api/v1/admin/ops/settings/metric-thresholds"
thresholds_raw = request_json(threshold_url, headers)
thresholds = parse_thresholds(parse_envelope_data(thresholds_raw))
query = {"platform": args.platform, "time_range": args.time_range}
if args.group_id.strip():
query["group_id"] = args.group_id.strip()
overview_url = (
f"{base}/api/v1/admin/ops/dashboard/overview?"
+ urllib.parse.urlencode(query)
)
overview_raw = request_json(overview_url, headers)
snapshot = parse_snapshot(parse_envelope_data(overview_raw))
print("[OpenAI OAuth Gray Guard] 当前快照:")
print(
json.dumps(
{
"sla": snapshot.sla,
"ttft_p99_ms": snapshot.ttft_p99_ms,
"request_error_rate_percent": snapshot.request_error_rate_percent,
"upstream_error_rate_percent": snapshot.upstream_error_rate_percent,
},
ensure_ascii=False,
indent=2,
)
)
print("[OpenAI OAuth Gray Guard] 阈值配置:")
print(
json.dumps(
{
"sla_percent_min": thresholds.sla_percent_min,
"ttft_p99_ms_max": thresholds.ttft_p99_ms_max,
"request_error_rate_percent_max": thresholds.request_error_rate_percent_max,
"upstream_error_rate_percent_max": thresholds.upstream_error_rate_percent_max,
},
ensure_ascii=False,
indent=2,
)
)
violations = evaluate(snapshot, thresholds)
if violations:
print("[OpenAI OAuth Gray Guard] 检测到阈值违例:")
for idx, line in enumerate(violations, start=1):
print(f" {idx}. {line}")
print("[OpenAI OAuth Gray Guard] 建议:停止扩量并执行回滚。")
return 2
print("[OpenAI OAuth Gray Guard] 指标通过,可继续观察或按计划扩量。")
return 0
except Exception as exc:
print(f"[OpenAI OAuth Gray Guard] 执行失败: {exc}", file=sys.stderr)
return 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,122 @@
import http from 'k6/http';
import { check } from 'k6';
import { Rate, Trend } from 'k6/metrics';
const baseURL = __ENV.BASE_URL || 'http://127.0.0.1:5231';
const apiKey = __ENV.API_KEY || '';
const model = __ENV.MODEL || 'gpt-5';
const timeout = __ENV.TIMEOUT || '180s';
const nonStreamRPS = Number(__ENV.NON_STREAM_RPS || 8);
const streamRPS = Number(__ENV.STREAM_RPS || 4);
const duration = __ENV.DURATION || '3m';
const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 30);
const maxVUs = Number(__ENV.MAX_VUS || 200);
const reqDurationMs = new Trend('openai_oauth_req_duration_ms', true);
const ttftMs = new Trend('openai_oauth_ttft_ms', true);
const non2xxRate = new Rate('openai_oauth_non2xx_rate');
const streamDoneRate = new Rate('openai_oauth_stream_done_rate');
export const options = {
scenarios: {
non_stream: {
executor: 'constant-arrival-rate',
rate: nonStreamRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
exec: 'runNonStream',
tags: { request_type: 'non_stream' },
},
stream: {
executor: 'constant-arrival-rate',
rate: streamRPS,
timeUnit: '1s',
duration,
preAllocatedVUs,
maxVUs,
exec: 'runStream',
tags: { request_type: 'stream' },
},
},
thresholds: {
openai_oauth_non2xx_rate: ['rate<0.01'],
openai_oauth_req_duration_ms: ['p(95)<3000', 'p(99)<6000'],
openai_oauth_ttft_ms: ['p(99)<1200'],
openai_oauth_stream_done_rate: ['rate>0.99'],
},
};
function buildHeaders() {
const headers = {
'Content-Type': 'application/json',
'User-Agent': 'codex_cli_rs/0.1.0',
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
return headers;
}
function buildBody(stream) {
return JSON.stringify({
model,
stream,
input: [
{
role: 'user',
content: [
{
type: 'input_text',
text: '请返回一句极短的话pong',
},
],
},
],
max_output_tokens: 32,
});
}
function recordMetrics(res, stream) {
reqDurationMs.add(res.timings.duration, { request_type: stream ? 'stream' : 'non_stream' });
ttftMs.add(res.timings.waiting, { request_type: stream ? 'stream' : 'non_stream' });
non2xxRate.add(res.status < 200 || res.status >= 300, { request_type: stream ? 'stream' : 'non_stream' });
if (stream) {
const done = !!res.body && res.body.indexOf('[DONE]') >= 0;
streamDoneRate.add(done, { request_type: 'stream' });
}
}
function postResponses(stream) {
const url = `${baseURL}/v1/responses`;
const res = http.post(url, buildBody(stream), {
headers: buildHeaders(),
timeout,
tags: { endpoint: '/v1/responses', request_type: stream ? 'stream' : 'non_stream' },
});
check(res, {
'status is 2xx': (r) => r.status >= 200 && r.status < 300,
});
recordMetrics(res, stream);
return res;
}
export function runNonStream() {
postResponses(false);
}
export function runStream() {
postResponses(true);
}
export function handleSummary(data) {
return {
stdout: `\nOpenAI OAuth /v1/responses 基线完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
'docs/perf/openai-oauth-k6-summary.json': JSON.stringify(data, null, 2),
};
}