- 4维指纹采集: 性能/语言/能力/行为 - models.py 已加入 IdentityFingerprintModel (第5维数据模型) - comparator.py 已升级为5维评分 (含identity维度比较) - reporter.py 已加入身份验证报告输出 - main.py 已集成identity采集流程 - identity collector 待下次提交补充完整代码
99 lines
3.5 KiB
Python
99 lines
3.5 KiB
Python
"""Performance fingerprint collector — latency, TTFT, TPS, response length."""
|
|
|
|
import numpy as np
|
|
from typing import List
|
|
from core.client import AIClient
|
|
from core.models import PerformanceFingerprint, CollectionConfig
|
|
from utils.tokenizer import estimate_tokens
|
|
|
|
|
|
# 5 standardized prompts of varying complexity
|
|
PERFORMANCE_PROMPTS = [
|
|
# Short, simple
|
|
"What is 2 + 2? Answer in one sentence.",
|
|
# Medium factual
|
|
"Explain the difference between TCP and UDP protocols in 3-4 sentences.",
|
|
# Longer creative
|
|
"Write a short poem (4-8 lines) about the beauty of mathematics.",
|
|
# Technical
|
|
"Write a Python function that checks if a string is a palindrome. Include a brief docstring.",
|
|
# Complex reasoning
|
|
"Compare and contrast merge sort and quicksort algorithms. Discuss time complexity, space complexity, and when to use each. Keep it under 200 words.",
|
|
]
|
|
|
|
|
|
async def collect_performance(client: AIClient, config: CollectionConfig,
|
|
progress_callback=None) -> PerformanceFingerprint:
|
|
"""
|
|
Collect performance fingerprint from an AI API channel.
|
|
|
|
Runs each prompt multiple times and gathers timing/size metrics.
|
|
"""
|
|
all_latencies: List[float] = []
|
|
all_ttfts: List[float] = []
|
|
all_tps: List[float] = []
|
|
all_response_lengths: List[int] = []
|
|
|
|
total_tasks = len(PERFORMANCE_PROMPTS) * config.repeat_count
|
|
completed = 0
|
|
|
|
for prompt_idx, prompt in enumerate(PERFORMANCE_PROMPTS):
|
|
for repeat in range(config.repeat_count):
|
|
try:
|
|
# Use streaming to get TTFT and TPS metrics
|
|
text, metrics, headers = await client.send_message_streaming(
|
|
prompt=prompt,
|
|
max_tokens=config.max_tokens,
|
|
)
|
|
|
|
# Calculate total latency from timestamps
|
|
if metrics.token_timestamps:
|
|
total_latency = metrics.token_timestamps[-1] * 1000 # convert to ms
|
|
else:
|
|
total_latency = metrics.ttft_ms
|
|
|
|
all_latencies.append(total_latency)
|
|
|
|
if metrics.ttft_ms > 0:
|
|
all_ttfts.append(metrics.ttft_ms)
|
|
|
|
if metrics.tps > 0:
|
|
all_tps.append(metrics.tps)
|
|
|
|
# Estimate response length in tokens
|
|
token_count = estimate_tokens(text)
|
|
all_response_lengths.append(token_count)
|
|
|
|
except Exception as e:
|
|
if progress_callback:
|
|
progress_callback(f" ⚠ Prompt {prompt_idx+1} repeat {repeat+1} failed: {e}")
|
|
continue
|
|
|
|
completed += 1
|
|
if progress_callback:
|
|
progress_callback(f" Performance: {completed}/{total_tasks}")
|
|
|
|
# Calculate percentiles
|
|
if all_latencies:
|
|
latency_arr = np.array(all_latencies)
|
|
p50 = float(np.percentile(latency_arr, 50))
|
|
p95 = float(np.percentile(latency_arr, 95))
|
|
p99 = float(np.percentile(latency_arr, 99))
|
|
else:
|
|
p50 = p95 = p99 = 0.0
|
|
|
|
avg_ttft = float(np.mean(all_ttfts)) if all_ttfts else 0.0
|
|
avg_tps = float(np.mean(all_tps)) if all_tps else 0.0
|
|
avg_resp_len = float(np.mean(all_response_lengths)) if all_response_lengths else 0.0
|
|
|
|
return PerformanceFingerprint(
|
|
latencies_ms=all_latencies,
|
|
p50_latency_ms=p50,
|
|
p95_latency_ms=p95,
|
|
p99_latency_ms=p99,
|
|
avg_ttft_ms=avg_ttft,
|
|
avg_tps=avg_tps,
|
|
response_lengths=all_response_lengths,
|
|
avg_response_length=avg_resp_len,
|
|
)
|