Files
sub2api/tools/perf/openai_oauth_gray_guard.py
yangjianbo 61a2bf469a feat(openai): 极致优化 OAuth 链路并补齐性能守护
- 优化 /v1/responses 热路径,减少重复解析与不必要拷贝\n- 优化并发与 token 竞争路径并补齐运行指标\n- 补充 OpenAI/Ops 相关单元测试与回归用例\n- 新增灰度阈值守护与压测脚本,支撑发布验收
2026-02-12 09:41:37 +08:00

214 lines
7.7 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""OpenAI OAuth 灰度阈值守护脚本。
用途:
- 拉取 Ops 指标阈值配置与 Dashboard Overview 实时数据
- 对比 P99 TTFT / 错误率 / SLA
- 作为 6.2 灰度守护的自动化门禁(退出码可直接用于 CI/CD
退出码:
- 0: 指标通过
- 1: 请求失败/参数错误
- 2: 指标超阈值(建议停止扩量并回滚)
"""
from __future__ import annotations
import argparse
import json
import sys
import urllib.error
import urllib.parse
import urllib.request
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
@dataclass
class GuardThresholds:
sla_percent_min: Optional[float]
ttft_p99_ms_max: Optional[float]
request_error_rate_percent_max: Optional[float]
upstream_error_rate_percent_max: Optional[float]
@dataclass
class GuardSnapshot:
sla: Optional[float]
ttft_p99_ms: Optional[float]
request_error_rate_percent: Optional[float]
upstream_error_rate_percent: Optional[float]
def build_headers(token: str) -> Dict[str, str]:
headers = {"Accept": "application/json"}
if token.strip():
headers["Authorization"] = f"Bearer {token.strip()}"
return headers
def request_json(url: str, headers: Dict[str, str]) -> Dict[str, Any]:
req = urllib.request.Request(url=url, method="GET", headers=headers)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
raw = resp.read().decode("utf-8")
return json.loads(raw)
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"HTTP {e.code}: {body}") from e
except urllib.error.URLError as e:
raise RuntimeError(f"request failed: {e}") from e
def parse_envelope_data(payload: Dict[str, Any]) -> Dict[str, Any]:
if not isinstance(payload, dict):
raise RuntimeError("invalid response payload")
if payload.get("code") != 0:
raise RuntimeError(f"api error: code={payload.get('code')} message={payload.get('message')}")
data = payload.get("data")
if not isinstance(data, dict):
raise RuntimeError("invalid response data")
return data
def parse_thresholds(data: Dict[str, Any]) -> GuardThresholds:
return GuardThresholds(
sla_percent_min=to_float_or_none(data.get("sla_percent_min")),
ttft_p99_ms_max=to_float_or_none(data.get("ttft_p99_ms_max")),
request_error_rate_percent_max=to_float_or_none(data.get("request_error_rate_percent_max")),
upstream_error_rate_percent_max=to_float_or_none(data.get("upstream_error_rate_percent_max")),
)
def parse_snapshot(data: Dict[str, Any]) -> GuardSnapshot:
ttft = data.get("ttft") if isinstance(data.get("ttft"), dict) else {}
return GuardSnapshot(
sla=to_float_or_none(data.get("sla")),
ttft_p99_ms=to_float_or_none(ttft.get("p99_ms")),
request_error_rate_percent=to_float_or_none(data.get("error_rate")),
upstream_error_rate_percent=to_float_or_none(data.get("upstream_error_rate")),
)
def to_float_or_none(v: Any) -> Optional[float]:
if v is None:
return None
try:
return float(v)
except (TypeError, ValueError):
return None
def evaluate(snapshot: GuardSnapshot, thresholds: GuardThresholds) -> List[str]:
violations: List[str] = []
if thresholds.sla_percent_min is not None and snapshot.sla is not None:
if snapshot.sla < thresholds.sla_percent_min:
violations.append(
f"SLA 低于阈值: actual={snapshot.sla:.2f}% threshold={thresholds.sla_percent_min:.2f}%"
)
if thresholds.ttft_p99_ms_max is not None and snapshot.ttft_p99_ms is not None:
if snapshot.ttft_p99_ms > thresholds.ttft_p99_ms_max:
violations.append(
f"TTFT P99 超阈值: actual={snapshot.ttft_p99_ms:.2f}ms threshold={thresholds.ttft_p99_ms_max:.2f}ms"
)
if (
thresholds.request_error_rate_percent_max is not None
and snapshot.request_error_rate_percent is not None
and snapshot.request_error_rate_percent > thresholds.request_error_rate_percent_max
):
violations.append(
"请求错误率超阈值: "
f"actual={snapshot.request_error_rate_percent:.2f}% "
f"threshold={thresholds.request_error_rate_percent_max:.2f}%"
)
if (
thresholds.upstream_error_rate_percent_max is not None
and snapshot.upstream_error_rate_percent is not None
and snapshot.upstream_error_rate_percent > thresholds.upstream_error_rate_percent_max
):
violations.append(
"上游错误率超阈值: "
f"actual={snapshot.upstream_error_rate_percent:.2f}% "
f"threshold={thresholds.upstream_error_rate_percent_max:.2f}%"
)
return violations
def main() -> int:
parser = argparse.ArgumentParser(description="OpenAI OAuth 灰度阈值守护")
parser.add_argument("--base-url", required=True, help="服务地址,例如 http://127.0.0.1:5231")
parser.add_argument("--admin-token", default="", help="Admin JWT可选按部署策略")
parser.add_argument("--platform", default="openai", help="平台过滤,默认 openai")
parser.add_argument("--time-range", default="30m", help="时间窗口: 5m/30m/1h/6h/24h/7d/30d")
parser.add_argument("--group-id", default="", help="可选 group_id")
args = parser.parse_args()
base = args.base_url.rstrip("/")
headers = build_headers(args.admin_token)
try:
threshold_url = f"{base}/api/v1/admin/ops/settings/metric-thresholds"
thresholds_raw = request_json(threshold_url, headers)
thresholds = parse_thresholds(parse_envelope_data(thresholds_raw))
query = {"platform": args.platform, "time_range": args.time_range}
if args.group_id.strip():
query["group_id"] = args.group_id.strip()
overview_url = (
f"{base}/api/v1/admin/ops/dashboard/overview?"
+ urllib.parse.urlencode(query)
)
overview_raw = request_json(overview_url, headers)
snapshot = parse_snapshot(parse_envelope_data(overview_raw))
print("[OpenAI OAuth Gray Guard] 当前快照:")
print(
json.dumps(
{
"sla": snapshot.sla,
"ttft_p99_ms": snapshot.ttft_p99_ms,
"request_error_rate_percent": snapshot.request_error_rate_percent,
"upstream_error_rate_percent": snapshot.upstream_error_rate_percent,
},
ensure_ascii=False,
indent=2,
)
)
print("[OpenAI OAuth Gray Guard] 阈值配置:")
print(
json.dumps(
{
"sla_percent_min": thresholds.sla_percent_min,
"ttft_p99_ms_max": thresholds.ttft_p99_ms_max,
"request_error_rate_percent_max": thresholds.request_error_rate_percent_max,
"upstream_error_rate_percent_max": thresholds.upstream_error_rate_percent_max,
},
ensure_ascii=False,
indent=2,
)
)
violations = evaluate(snapshot, thresholds)
if violations:
print("[OpenAI OAuth Gray Guard] 检测到阈值违例:")
for idx, line in enumerate(violations, start=1):
print(f" {idx}. {line}")
print("[OpenAI OAuth Gray Guard] 建议:停止扩量并执行回滚。")
return 2
print("[OpenAI OAuth Gray Guard] 指标通过,可继续观察或按计划扩量。")
return 0
except Exception as exc:
print(f"[OpenAI OAuth Gray Guard] 执行失败: {exc}", file=sys.stderr)
return 1
if __name__ == "__main__":
raise SystemExit(main())