Files
ai-xn-check/collectors/performance.py
nosqli cdcd69256b feat: AI API 指纹检测对比工具 - 初始版本
- 4维指纹采集: 性能/语言/能力/行为
- models.py 已加入 IdentityFingerprintModel (第5维数据模型)
- comparator.py 已升级为5维评分 (含identity维度比较)
- reporter.py 已加入身份验证报告输出
- main.py 已集成identity采集流程
- identity collector 待下次提交补充完整代码
2026-03-09 00:15:03 +08:00

99 lines
3.5 KiB
Python

"""Performance fingerprint collector — latency, TTFT, TPS, response length."""
import numpy as np
from typing import List
from core.client import AIClient
from core.models import PerformanceFingerprint, CollectionConfig
from utils.tokenizer import estimate_tokens
# 5 standardized prompts of varying complexity
PERFORMANCE_PROMPTS = [
# Short, simple
"What is 2 + 2? Answer in one sentence.",
# Medium factual
"Explain the difference between TCP and UDP protocols in 3-4 sentences.",
# Longer creative
"Write a short poem (4-8 lines) about the beauty of mathematics.",
# Technical
"Write a Python function that checks if a string is a palindrome. Include a brief docstring.",
# Complex reasoning
"Compare and contrast merge sort and quicksort algorithms. Discuss time complexity, space complexity, and when to use each. Keep it under 200 words.",
]
async def collect_performance(client: AIClient, config: CollectionConfig,
progress_callback=None) -> PerformanceFingerprint:
"""
Collect performance fingerprint from an AI API channel.
Runs each prompt multiple times and gathers timing/size metrics.
"""
all_latencies: List[float] = []
all_ttfts: List[float] = []
all_tps: List[float] = []
all_response_lengths: List[int] = []
total_tasks = len(PERFORMANCE_PROMPTS) * config.repeat_count
completed = 0
for prompt_idx, prompt in enumerate(PERFORMANCE_PROMPTS):
for repeat in range(config.repeat_count):
try:
# Use streaming to get TTFT and TPS metrics
text, metrics, headers = await client.send_message_streaming(
prompt=prompt,
max_tokens=config.max_tokens,
)
# Calculate total latency from timestamps
if metrics.token_timestamps:
total_latency = metrics.token_timestamps[-1] * 1000 # convert to ms
else:
total_latency = metrics.ttft_ms
all_latencies.append(total_latency)
if metrics.ttft_ms > 0:
all_ttfts.append(metrics.ttft_ms)
if metrics.tps > 0:
all_tps.append(metrics.tps)
# Estimate response length in tokens
token_count = estimate_tokens(text)
all_response_lengths.append(token_count)
except Exception as e:
if progress_callback:
progress_callback(f" ⚠ Prompt {prompt_idx+1} repeat {repeat+1} failed: {e}")
continue
completed += 1
if progress_callback:
progress_callback(f" Performance: {completed}/{total_tasks}")
# Calculate percentiles
if all_latencies:
latency_arr = np.array(all_latencies)
p50 = float(np.percentile(latency_arr, 50))
p95 = float(np.percentile(latency_arr, 95))
p99 = float(np.percentile(latency_arr, 99))
else:
p50 = p95 = p99 = 0.0
avg_ttft = float(np.mean(all_ttfts)) if all_ttfts else 0.0
avg_tps = float(np.mean(all_tps)) if all_tps else 0.0
avg_resp_len = float(np.mean(all_response_lengths)) if all_response_lengths else 0.0
return PerformanceFingerprint(
latencies_ms=all_latencies,
p50_latency_ms=p50,
p95_latency_ms=p95,
p99_latency_ms=p99,
avg_ttft_ms=avg_ttft,
avg_tps=avg_tps,
response_lengths=all_response_lengths,
avg_response_length=avg_resp_len,
)