feat(openai): 极致优化 OAuth 链路并补齐性能守护

- 优化 /v1/responses 热路径，减少重复解析与不必要拷贝\n- 优化并发与 token 竞争路径并补齐运行指标\n- 补充 OpenAI/Ops 相关单元测试与回归用例\n- 新增灰度阈值守护与压测脚本，支撑发布验收
2026-02-12 09:41:37 +08:00
parent a88bb8684f
commit 61a2bf469a
16 changed files with 1519 additions and 135 deletions
--- a/tools/perf/openai_oauth_gray_drill.py
+++ b/tools/perf/openai_oauth_gray_drill.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+"""OpenAI OAuth 灰度发布演练脚本（本地模拟）。
+
+该脚本会启动本地 mock Ops API，调用 openai_oauth_gray_guard.py，
+验证以下场景：
+1) A/B/C/D 四个灰度批次均通过
+2) 注入异常场景触发阈值告警并返回退出码 2（模拟自动回滚触发）
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import threading
+from dataclasses import dataclass
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from pathlib import Path
+from typing import Dict, Tuple
+from urllib.parse import parse_qs, urlparse
+
+ROOT = Path(__file__).resolve().parents[2]
+GUARD_SCRIPT = ROOT / "tools" / "perf" / "openai_oauth_gray_guard.py"
+REPORT_PATH = ROOT / "docs" / "perf" / "openai-oauth-gray-drill-report.md"
+
+
+THRESHOLDS = {
+    "sla_percent_min": 99.5,
+    "ttft_p99_ms_max": 900,
+    "request_error_rate_percent_max": 2.0,
+    "upstream_error_rate_percent_max": 2.0,
+}
+
+STAGE_SNAPSHOTS: Dict[str, Dict[str, float]] = {
+    "A": {"sla": 99.78, "ttft": 780, "error_rate": 1.20, "upstream_error_rate": 1.05},
+    "B": {"sla": 99.82, "ttft": 730, "error_rate": 1.05, "upstream_error_rate": 0.92},
+    "C": {"sla": 99.86, "ttft": 680, "error_rate": 0.88, "upstream_error_rate": 0.80},
+    "D": {"sla": 99.89, "ttft": 640, "error_rate": 0.72, "upstream_error_rate": 0.67},
+    "rollback": {"sla": 97.10, "ttft": 1550, "error_rate": 6.30, "upstream_error_rate": 5.60},
+}
+
+
+class _MockHandler(BaseHTTPRequestHandler):
+    def _write_json(self, payload: dict) -> None:
+        raw = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(raw)))
+        self.end_headers()
+        self.wfile.write(raw)
+
+    def log_message(self, format: str, *args):  # noqa: A003
+        return
+
+    def do_GET(self):  # noqa: N802
+        parsed = urlparse(self.path)
+        if parsed.path.endswith("/api/v1/admin/ops/settings/metric-thresholds"):
+            self._write_json({"code": 0, "message": "success", "data": THRESHOLDS})
+            return
+
+        if parsed.path.endswith("/api/v1/admin/ops/dashboard/overview"):
+            q = parse_qs(parsed.query)
+            stage = (q.get("group_id") or ["A"])[0]
+            snapshot = STAGE_SNAPSHOTS.get(stage, STAGE_SNAPSHOTS["A"])
+            self._write_json(
+                {
+                    "code": 0,
+                    "message": "success",
+                    "data": {
+                        "sla": snapshot["sla"],
+                        "error_rate": snapshot["error_rate"],
+                        "upstream_error_rate": snapshot["upstream_error_rate"],
+                        "ttft": {"p99_ms": snapshot["ttft"]},
+                    },
+                }
+            )
+            return
+
+        self.send_response(404)
+        self.end_headers()
+
+
+def run_guard(base_url: str, stage: str) -> Tuple[int, str]:
+    cmd = [
+        "python",
+        str(GUARD_SCRIPT),
+        "--base-url",
+        base_url,
+        "--platform",
+        "openai",
+        "--time-range",
+        "30m",
+        "--group-id",
+        stage,
+    ]
+    proc = subprocess.run(cmd, cwd=str(ROOT), capture_output=True, text=True)
+    output = (proc.stdout + "\n" + proc.stderr).strip()
+    return proc.returncode, output
+
+
+def main() -> int:
+    server = HTTPServer(("127.0.0.1", 0), _MockHandler)
+    host, port = server.server_address
+    base_url = f"http://{host}:{port}"
+
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+
+    lines = [
+        "# OpenAI OAuth 灰度守护演练报告",
+        "",
+        "> 类型：本地 mock 演练（用于验证灰度守护与回滚触发机制）",
+        f"> 生成脚本：`tools/perf/openai_oauth_gray_drill.py`",
+        "",
+        "## 1. 灰度批次结果（6.1）",
+        "",
+        "| 批次 | 流量比例 | 守护脚本退出码 | 结果 |",
+        "|---|---:|---:|---|",
+    ]
+
+    batch_plan = [("A", "5%"), ("B", "20%"), ("C", "50%"), ("D", "100%")]
+    all_pass = True
+    for stage, ratio in batch_plan:
+        code, _ = run_guard(base_url, stage)
+        ok = code == 0
+        all_pass = all_pass and ok
+        lines.append(f"| {stage} | {ratio} | {code} | {'通过' if ok else '失败'} |")
+
+    lines.extend([
+        "",
+        "## 2. 回滚触发演练（6.2）",
+        "",
+    ])
+
+    rollback_code, rollback_output = run_guard(base_url, "rollback")
+    rollback_triggered = rollback_code == 2
+    lines.append(f"- 注入异常场景退出码：`{rollback_code}`")
+    lines.append(f"- 是否触发回滚条件：`{'是' if rollback_triggered else '否'}`")
+    lines.append("- 关键信息摘录：")
+    excerpt = "\n".join(rollback_output.splitlines()[:8])
+    lines.append("```text")
+    lines.append(excerpt)
+    lines.append("```")
+
+    lines.extend([
+        "",
+        "## 3. 验收结论（6.3）",
+        "",
+        f"- 批次灰度结果：`{'通过' if all_pass else '不通过'}`",
+        f"- 回滚触发机制：`{'通过' if rollback_triggered else '不通过'}`",
+        f"- 结论：`{'通过（可进入真实环境灰度）' if all_pass and rollback_triggered else '不通过（需修复后复测）'}`",
+    ])
+
+    REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    REPORT_PATH.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+    server.shutdown()
+    server.server_close()
+
+    print(f"drill report generated: {REPORT_PATH}")
+    return 0 if all_pass and rollback_triggered else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tools/perf/openai_oauth_gray_guard.py
+++ b/tools/perf/openai_oauth_gray_guard.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""OpenAI OAuth 灰度阈值守护脚本。
+
+用途：
+- 拉取 Ops 指标阈值配置与 Dashboard Overview 实时数据
+- 对比 P99 TTFT / 错误率 / SLA
+- 作为 6.2 灰度守护的自动化门禁（退出码可直接用于 CI/CD）
+
+退出码：
+- 0: 指标通过
+- 1: 请求失败/参数错误
+- 2: 指标超阈值（建议停止扩量并回滚）
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class GuardThresholds:
+    sla_percent_min: Optional[float]
+    ttft_p99_ms_max: Optional[float]
+    request_error_rate_percent_max: Optional[float]
+    upstream_error_rate_percent_max: Optional[float]
+
+
+@dataclass
+class GuardSnapshot:
+    sla: Optional[float]
+    ttft_p99_ms: Optional[float]
+    request_error_rate_percent: Optional[float]
+    upstream_error_rate_percent: Optional[float]
+
+
+def build_headers(token: str) -> Dict[str, str]:
+    headers = {"Accept": "application/json"}
+    if token.strip():
+        headers["Authorization"] = f"Bearer {token.strip()}"
+    return headers
+
+
+def request_json(url: str, headers: Dict[str, str]) -> Dict[str, Any]:
+    req = urllib.request.Request(url=url, method="GET", headers=headers)
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            raw = resp.read().decode("utf-8")
+            return json.loads(raw)
+    except urllib.error.HTTPError as e:
+        body = e.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"HTTP {e.code}: {body}") from e
+    except urllib.error.URLError as e:
+        raise RuntimeError(f"request failed: {e}") from e
+
+
+def parse_envelope_data(payload: Dict[str, Any]) -> Dict[str, Any]:
+    if not isinstance(payload, dict):
+        raise RuntimeError("invalid response payload")
+    if payload.get("code") != 0:
+        raise RuntimeError(f"api error: code={payload.get('code')} message={payload.get('message')}")
+    data = payload.get("data")
+    if not isinstance(data, dict):
+        raise RuntimeError("invalid response data")
+    return data
+
+
+def parse_thresholds(data: Dict[str, Any]) -> GuardThresholds:
+    return GuardThresholds(
+        sla_percent_min=to_float_or_none(data.get("sla_percent_min")),
+        ttft_p99_ms_max=to_float_or_none(data.get("ttft_p99_ms_max")),
+        request_error_rate_percent_max=to_float_or_none(data.get("request_error_rate_percent_max")),
+        upstream_error_rate_percent_max=to_float_or_none(data.get("upstream_error_rate_percent_max")),
+    )
+
+
+def parse_snapshot(data: Dict[str, Any]) -> GuardSnapshot:
+    ttft = data.get("ttft") if isinstance(data.get("ttft"), dict) else {}
+    return GuardSnapshot(
+        sla=to_float_or_none(data.get("sla")),
+        ttft_p99_ms=to_float_or_none(ttft.get("p99_ms")),
+        request_error_rate_percent=to_float_or_none(data.get("error_rate")),
+        upstream_error_rate_percent=to_float_or_none(data.get("upstream_error_rate")),
+    )
+
+
+def to_float_or_none(v: Any) -> Optional[float]:
+    if v is None:
+        return None
+    try:
+        return float(v)
+    except (TypeError, ValueError):
+        return None
+
+
+def evaluate(snapshot: GuardSnapshot, thresholds: GuardThresholds) -> List[str]:
+    violations: List[str] = []
+
+    if thresholds.sla_percent_min is not None and snapshot.sla is not None:
+        if snapshot.sla < thresholds.sla_percent_min:
+            violations.append(
+                f"SLA 低于阈值: actual={snapshot.sla:.2f}% threshold={thresholds.sla_percent_min:.2f}%"
+            )
+
+    if thresholds.ttft_p99_ms_max is not None and snapshot.ttft_p99_ms is not None:
+        if snapshot.ttft_p99_ms > thresholds.ttft_p99_ms_max:
+            violations.append(
+                f"TTFT P99 超阈值: actual={snapshot.ttft_p99_ms:.2f}ms threshold={thresholds.ttft_p99_ms_max:.2f}ms"
+            )
+
+    if (
+        thresholds.request_error_rate_percent_max is not None
+        and snapshot.request_error_rate_percent is not None
+        and snapshot.request_error_rate_percent > thresholds.request_error_rate_percent_max
+    ):
+        violations.append(
+            "请求错误率超阈值: "
+            f"actual={snapshot.request_error_rate_percent:.2f}% "
+            f"threshold={thresholds.request_error_rate_percent_max:.2f}%"
+        )
+
+    if (
+        thresholds.upstream_error_rate_percent_max is not None
+        and snapshot.upstream_error_rate_percent is not None
+        and snapshot.upstream_error_rate_percent > thresholds.upstream_error_rate_percent_max
+    ):
+        violations.append(
+            "上游错误率超阈值: "
+            f"actual={snapshot.upstream_error_rate_percent:.2f}% "
+            f"threshold={thresholds.upstream_error_rate_percent_max:.2f}%"
+        )
+
+    return violations
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="OpenAI OAuth 灰度阈值守护")
+    parser.add_argument("--base-url", required=True, help="服务地址，例如 http://127.0.0.1:5231")
+    parser.add_argument("--admin-token", default="", help="Admin JWT（可选，按部署策略）")
+    parser.add_argument("--platform", default="openai", help="平台过滤，默认 openai")
+    parser.add_argument("--time-range", default="30m", help="时间窗口: 5m/30m/1h/6h/24h/7d/30d")
+    parser.add_argument("--group-id", default="", help="可选 group_id")
+    args = parser.parse_args()
+
+    base = args.base_url.rstrip("/")
+    headers = build_headers(args.admin_token)
+
+    try:
+        threshold_url = f"{base}/api/v1/admin/ops/settings/metric-thresholds"
+        thresholds_raw = request_json(threshold_url, headers)
+        thresholds = parse_thresholds(parse_envelope_data(thresholds_raw))
+
+        query = {"platform": args.platform, "time_range": args.time_range}
+        if args.group_id.strip():
+            query["group_id"] = args.group_id.strip()
+        overview_url = (
+            f"{base}/api/v1/admin/ops/dashboard/overview?"
+            + urllib.parse.urlencode(query)
+        )
+        overview_raw = request_json(overview_url, headers)
+        snapshot = parse_snapshot(parse_envelope_data(overview_raw))
+
+        print("[OpenAI OAuth Gray Guard] 当前快照:")
+        print(
+            json.dumps(
+                {
+                    "sla": snapshot.sla,
+                    "ttft_p99_ms": snapshot.ttft_p99_ms,
+                    "request_error_rate_percent": snapshot.request_error_rate_percent,
+                    "upstream_error_rate_percent": snapshot.upstream_error_rate_percent,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        print("[OpenAI OAuth Gray Guard] 阈值配置:")
+        print(
+            json.dumps(
+                {
+                    "sla_percent_min": thresholds.sla_percent_min,
+                    "ttft_p99_ms_max": thresholds.ttft_p99_ms_max,
+                    "request_error_rate_percent_max": thresholds.request_error_rate_percent_max,
+                    "upstream_error_rate_percent_max": thresholds.upstream_error_rate_percent_max,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+
+        violations = evaluate(snapshot, thresholds)
+        if violations:
+            print("[OpenAI OAuth Gray Guard] 检测到阈值违例：")
+            for idx, line in enumerate(violations, start=1):
+                print(f"  {idx}. {line}")
+            print("[OpenAI OAuth Gray Guard] 建议：停止扩量并执行回滚。")
+            return 2
+
+        print("[OpenAI OAuth Gray Guard] 指标通过，可继续观察或按计划扩量。")
+        return 0
+
+    except Exception as exc:
+        print(f"[OpenAI OAuth Gray Guard] 执行失败: {exc}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tools/perf/openai_oauth_responses_k6.js
+++ b/tools/perf/openai_oauth_responses_k6.js
@@ -0,0 +1,122 @@
+import http from 'k6/http';
+import { check } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+
+const baseURL = __ENV.BASE_URL || 'http://127.0.0.1:5231';
+const apiKey = __ENV.API_KEY || '';
+const model = __ENV.MODEL || 'gpt-5';
+const timeout = __ENV.TIMEOUT || '180s';
+
+const nonStreamRPS = Number(__ENV.NON_STREAM_RPS || 8);
+const streamRPS = Number(__ENV.STREAM_RPS || 4);
+const duration = __ENV.DURATION || '3m';
+const preAllocatedVUs = Number(__ENV.PRE_ALLOCATED_VUS || 30);
+const maxVUs = Number(__ENV.MAX_VUS || 200);
+
+const reqDurationMs = new Trend('openai_oauth_req_duration_ms', true);
+const ttftMs = new Trend('openai_oauth_ttft_ms', true);
+const non2xxRate = new Rate('openai_oauth_non2xx_rate');
+const streamDoneRate = new Rate('openai_oauth_stream_done_rate');
+
+export const options = {
+  scenarios: {
+    non_stream: {
+      executor: 'constant-arrival-rate',
+      rate: nonStreamRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      exec: 'runNonStream',
+      tags: { request_type: 'non_stream' },
+    },
+    stream: {
+      executor: 'constant-arrival-rate',
+      rate: streamRPS,
+      timeUnit: '1s',
+      duration,
+      preAllocatedVUs,
+      maxVUs,
+      exec: 'runStream',
+      tags: { request_type: 'stream' },
+    },
+  },
+  thresholds: {
+    openai_oauth_non2xx_rate: ['rate<0.01'],
+    openai_oauth_req_duration_ms: ['p(95)<3000', 'p(99)<6000'],
+    openai_oauth_ttft_ms: ['p(99)<1200'],
+    openai_oauth_stream_done_rate: ['rate>0.99'],
+  },
+};
+
+function buildHeaders() {
+  const headers = {
+    'Content-Type': 'application/json',
+    'User-Agent': 'codex_cli_rs/0.1.0',
+  };
+  if (apiKey) {
+    headers.Authorization = `Bearer ${apiKey}`;
+  }
+  return headers;
+}
+
+function buildBody(stream) {
+  return JSON.stringify({
+    model,
+    stream,
+    input: [
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'input_text',
+            text: '请返回一句极短的话：pong',
+          },
+        ],
+      },
+    ],
+    max_output_tokens: 32,
+  });
+}
+
+function recordMetrics(res, stream) {
+  reqDurationMs.add(res.timings.duration, { request_type: stream ? 'stream' : 'non_stream' });
+  ttftMs.add(res.timings.waiting, { request_type: stream ? 'stream' : 'non_stream' });
+  non2xxRate.add(res.status < 200 || res.status >= 300, { request_type: stream ? 'stream' : 'non_stream' });
+
+  if (stream) {
+    const done = !!res.body && res.body.indexOf('[DONE]') >= 0;
+    streamDoneRate.add(done, { request_type: 'stream' });
+  }
+}
+
+function postResponses(stream) {
+  const url = `${baseURL}/v1/responses`;
+  const res = http.post(url, buildBody(stream), {
+    headers: buildHeaders(),
+    timeout,
+    tags: { endpoint: '/v1/responses', request_type: stream ? 'stream' : 'non_stream' },
+  });
+
+  check(res, {
+    'status is 2xx': (r) => r.status >= 200 && r.status < 300,
+  });
+
+  recordMetrics(res, stream);
+  return res;
+}
+
+export function runNonStream() {
+  postResponses(false);
+}
+
+export function runStream() {
+  postResponses(true);
+}
+
+export function handleSummary(data) {
+  return {
+    stdout: `\nOpenAI OAuth /v1/responses 基线完成\n${JSON.stringify(data.metrics, null, 2)}\n`,
+    'docs/perf/openai-oauth-k6-summary.json': JSON.stringify(data, null, 2),
+  };
+}