Files
ai-xn-check/analysis/comparator.py
nosqli cdcd69256b feat: AI API 指纹检测对比工具 - 初始版本
- 4维指纹采集: 性能/语言/能力/行为
- models.py 已加入 IdentityFingerprintModel (第5维数据模型)
- comparator.py 已升级为5维评分 (含identity维度比较)
- reporter.py 已加入身份验证报告输出
- main.py 已集成identity采集流程
- identity collector 待下次提交补充完整代码
2026-03-09 00:15:03 +08:00

416 lines
14 KiB
Python

"""Fingerprint comparison engine — 5-dimension scoring (with identity verification)."""
from datetime import datetime
from typing import Dict, List
from core.models import (
FullFingerprint, ComparisonResult, DimensionScore,
PerformanceFingerprint, LanguageFingerprint,
CapabilityFingerprint, BehavioralFingerprint,
IdentityFingerprintModel,
)
from utils.text_analysis import (
jaccard_similarity, dict_cosine_similarity, text_similarity,
)
# Dimension weights (5 dimensions now)
WEIGHTS = {
"performance": 0.15,
"language": 0.20,
"capability": 0.20,
"behavioral": 0.20,
"identity": 0.25,
}
# Verdict thresholds
GENUINE_THRESHOLD = 0.80
SUSPICIOUS_THRESHOLD = 0.60
def numeric_similarity(a: float, b: float) -> float:
"""Calculate similarity between two numeric values. Returns 0-1."""
if a == 0 and b == 0:
return 1.0
max_val = max(abs(a), abs(b))
if max_val == 0:
return 1.0
return 1.0 - abs(a - b) / max_val
def compare_fingerprints(genuine: FullFingerprint, suspect: FullFingerprint) -> ComparisonResult:
"""
Compare two fingerprints and produce a scored comparison result.
"""
dimension_scores = []
# 1. Performance comparison
perf_score = _compare_performance(genuine.performance, suspect.performance)
dimension_scores.append(perf_score)
# 2. Language comparison
lang_score = _compare_language(genuine.language, suspect.language)
dimension_scores.append(lang_score)
# 3. Capability comparison
cap_score = _compare_capability(genuine.capability, suspect.capability)
dimension_scores.append(cap_score)
# 4. Behavioral comparison
beh_score = _compare_behavioral(genuine.behavioral, suspect.behavioral)
dimension_scores.append(beh_score)
# 5. Identity comparison
id_score = _compare_identity(genuine.identity, suspect.identity)
dimension_scores.append(id_score)
# Calculate weighted overall score
overall = sum(ds.score * ds.weight for ds in dimension_scores)
# Determine verdict
if overall >= GENUINE_THRESHOLD:
verdict = "✅ GENUINE"
elif overall >= SUSPICIOUS_THRESHOLD:
verdict = "⚠️ SUSPICIOUS"
else:
verdict = "❌ LIKELY FAKE"
return ComparisonResult(
genuine_channel=genuine.channel_name,
suspect_channel=suspect.channel_name,
dimension_scores=dimension_scores,
overall_score=overall,
verdict=verdict,
timestamp=datetime.now().isoformat(),
)
def _compare_performance(g: PerformanceFingerprint, s: PerformanceFingerprint) -> DimensionScore:
"""Compare performance fingerprints."""
details = {}
scores = []
# Latency similarity (P50)
lat_sim = numeric_similarity(g.p50_latency_ms, s.p50_latency_ms)
details["p50_latency_similarity"] = round(lat_sim, 3)
scores.append(lat_sim)
# TPS similarity
tps_sim = numeric_similarity(g.avg_tps, s.avg_tps)
details["tps_similarity"] = round(tps_sim, 3)
scores.append(tps_sim)
# TTFT similarity
ttft_sim = numeric_similarity(g.avg_ttft_ms, s.avg_ttft_ms)
details["ttft_similarity"] = round(ttft_sim, 3)
scores.append(ttft_sim)
# Response length similarity
len_sim = numeric_similarity(g.avg_response_length, s.avg_response_length)
details["response_length_similarity"] = round(len_sim, 3)
scores.append(len_sim)
avg_score = sum(scores) / len(scores) if scores else 0.0
details["component_scores"] = [round(s, 3) for s in scores]
return DimensionScore(
dimension="Performance",
score=round(avg_score, 3),
weight=WEIGHTS["performance"],
details=details,
)
def _compare_language(g: LanguageFingerprint, s: LanguageFingerprint) -> DimensionScore:
"""Compare language fingerprints."""
details = {}
scores = []
# Vocabulary richness similarity
vocab_sim = numeric_similarity(g.vocab_richness, s.vocab_richness)
details["vocab_richness_similarity"] = round(vocab_sim, 3)
scores.append(vocab_sim)
# Bigram overlap (Jaccard on top bigram keys)
g_bigrams = set(g.top_bigrams.keys())
s_bigrams = set(s.top_bigrams.keys())
bigram_sim = jaccard_similarity(g_bigrams, s_bigrams)
details["bigram_overlap"] = round(bigram_sim, 3)
scores.append(bigram_sim)
# Format features cosine similarity
format_sim = dict_cosine_similarity(g.format_features, s.format_features)
details["format_similarity"] = round(format_sim, 3)
scores.append(format_sim)
# Opening patterns similarity
if g.opening_patterns and s.opening_patterns:
opening_sims = []
for gp in g.opening_patterns:
for sp in s.opening_patterns:
opening_sims.append(text_similarity(gp, sp))
opening_sim = sum(opening_sims) / len(opening_sims) if opening_sims else 0.0
else:
opening_sim = 0.5 # neutral if no data
details["opening_pattern_similarity"] = round(opening_sim, 3)
scores.append(opening_sim)
# Closing patterns similarity
if g.closing_patterns and s.closing_patterns:
closing_sims = []
for gp in g.closing_patterns:
for sp in s.closing_patterns:
closing_sims.append(text_similarity(gp, sp))
closing_sim = sum(closing_sims) / len(closing_sims) if closing_sims else 0.0
else:
closing_sim = 0.5
details["closing_pattern_similarity"] = round(closing_sim, 3)
scores.append(closing_sim)
# CJK ratio similarity
cjk_sim = numeric_similarity(g.cjk_ratio, s.cjk_ratio)
details["cjk_ratio_similarity"] = round(cjk_sim, 3)
scores.append(cjk_sim)
avg_score = sum(scores) / len(scores) if scores else 0.0
return DimensionScore(
dimension="Language",
score=round(avg_score, 3),
weight=WEIGHTS["language"],
details=details,
)
def _compare_capability(g: CapabilityFingerprint, s: CapabilityFingerprint) -> DimensionScore:
"""Compare capability fingerprints."""
details = {}
scores = []
# Knowledge response similarity
if g.knowledge_cutoff_responses and s.knowledge_cutoff_responses:
knowledge_sims = []
for key in g.knowledge_cutoff_responses:
if key in s.knowledge_cutoff_responses:
sim = text_similarity(
g.knowledge_cutoff_responses[key],
s.knowledge_cutoff_responses[key],
)
knowledge_sims.append(sim)
knowledge_sim = sum(knowledge_sims) / len(knowledge_sims) if knowledge_sims else 0.0
else:
knowledge_sim = 0.0
details["knowledge_similarity"] = round(knowledge_sim, 3)
scores.append(knowledge_sim)
# Math score match rate
if g.math_scores and s.math_scores:
math_matches = sum(
1 for k in g.math_scores
if k in s.math_scores and g.math_scores[k] == s.math_scores[k]
)
math_sim = math_matches / len(g.math_scores)
else:
math_sim = 0.0
details["math_match_rate"] = round(math_sim, 3)
scores.append(math_sim)
# Code score match rate
if g.code_scores and s.code_scores:
code_matches = sum(
1 for k in g.code_scores
if k in s.code_scores and g.code_scores[k] == s.code_scores[k]
)
code_sim = code_matches / len(g.code_scores)
else:
code_sim = 0.0
details["code_match_rate"] = round(code_sim, 3)
scores.append(code_sim)
# Refusal pattern match rate
if g.refusal_patterns and s.refusal_patterns:
refusal_matches = sum(
1 for k in g.refusal_patterns
if k in s.refusal_patterns and g.refusal_patterns[k] == s.refusal_patterns[k]
)
refusal_sim = refusal_matches / len(g.refusal_patterns)
else:
refusal_sim = 0.0
details["refusal_match_rate"] = round(refusal_sim, 3)
scores.append(refusal_sim)
avg_score = sum(scores) / len(scores) if scores else 0.0
return DimensionScore(
dimension="Capability",
score=round(avg_score, 3),
weight=WEIGHTS["capability"],
details=details,
)
def _compare_behavioral(g: BehavioralFingerprint, s: BehavioralFingerprint) -> DimensionScore:
"""Compare behavioral fingerprints."""
details = {}
scores = []
# Consistency similarity
if g.consistency_scores and s.consistency_scores:
avg_g = sum(g.consistency_scores) / len(g.consistency_scores)
avg_s = sum(s.consistency_scores) / len(s.consistency_scores)
consistency_sim = numeric_similarity(avg_g, avg_s)
else:
consistency_sim = 0.5
details["consistency_similarity"] = round(consistency_sim, 3)
scores.append(consistency_sim)
# Instruction compliance match rate
if g.instruction_compliance and s.instruction_compliance:
compliance_matches = sum(
1 for k in g.instruction_compliance
if k in s.instruction_compliance
and g.instruction_compliance[k] == s.instruction_compliance[k]
)
compliance_sim = compliance_matches / len(g.instruction_compliance)
else:
compliance_sim = 0.0
details["compliance_match_rate"] = round(compliance_sim, 3)
scores.append(compliance_sim)
# HTTP header fingerprint similarity
if g.response_headers and s.response_headers:
common_keys = set(g.response_headers.keys()) & set(s.response_headers.keys())
all_keys = set(g.response_headers.keys()) | set(s.response_headers.keys())
if all_keys:
# Key presence similarity
key_sim = len(common_keys) / len(all_keys)
# Value match for common keys
value_matches = sum(
1 for k in common_keys
if g.response_headers[k] == s.response_headers[k]
)
value_sim = value_matches / len(common_keys) if common_keys else 0.0
header_sim = 0.6 * key_sim + 0.4 * value_sim
else:
header_sim = 0.5
else:
header_sim = 0.5 # neutral if no header data
details["header_similarity"] = round(header_sim, 3)
scores.append(header_sim)
avg_score = sum(scores) / len(scores) if scores else 0.0
return DimensionScore(
dimension="Behavioral",
score=round(avg_score, 3),
weight=WEIGHTS["behavioral"],
details=details,
)
def _compare_identity(g: IdentityFingerprintModel, s: IdentityFingerprintModel) -> DimensionScore:
"""Compare identity verification fingerprints.
This dimension focuses on whether both channels claim to be the same model
and whether the suspect shows signs of being a different model.
"""
details = {}
scores = []
# 1. Identity claim consistency — do both claim to be the same model?
if g.claimed_identity and s.claimed_identity:
if g.claimed_identity == s.claimed_identity:
identity_claim_match = 1.0
else:
identity_claim_match = 0.0
else:
identity_claim_match = 0.5 # can't determine
details["identity_claim_match"] = round(identity_claim_match, 3)
details["genuine_claims"] = g.claimed_identity or "unknown"
details["suspect_claims"] = s.claimed_identity or "unknown"
scores.append(identity_claim_match)
# 2. Developer claim consistency
if g.claimed_developer and s.claimed_developer:
if g.claimed_developer == s.claimed_developer:
developer_match = 1.0
else:
developer_match = 0.0
else:
developer_match = 0.5
details["developer_match"] = round(developer_match, 3)
scores.append(developer_match)
# 3. Detected model agreement
if g.detected_model and s.detected_model:
if g.detected_model == s.detected_model:
detected_match = 1.0
else:
detected_match = 0.0
else:
detected_match = 0.5
details["detected_model_match"] = round(detected_match, 3)
details["genuine_detected"] = g.detected_model or "unknown"
details["suspect_detected"] = s.detected_model or "unknown"
scores.append(detected_match)
# 4. Model score profile similarity (cosine similarity of score vectors)
if g.model_scores and s.model_scores:
model_profile_sim = dict_cosine_similarity(
{k: float(v) for k, v in g.model_scores.items()},
{k: float(v) for k, v in s.model_scores.items()},
)
else:
model_profile_sim = 0.5
details["model_profile_similarity"] = round(model_profile_sim, 3)
scores.append(model_profile_sim)
# 5. Vocabulary marker similarity
if g.vocab_markers and s.vocab_markers:
vocab_sim = dict_cosine_similarity(
{k: float(v) for k, v in g.vocab_markers.items()},
{k: float(v) for k, v in s.vocab_markers.items()},
)
else:
vocab_sim = 0.5
details["vocab_marker_similarity"] = round(vocab_sim, 3)
scores.append(vocab_sim)
# 6. System prompt leak — penalize if suspect leaks a system prompt
if s.system_prompt_leaked and not g.system_prompt_leaked:
system_prompt_penalty = 0.0 # big red flag
details["system_prompt_alert"] = "⚠️ Suspect leaked system prompt!"
elif s.system_prompt_leaked == g.system_prompt_leaked:
system_prompt_penalty = 1.0
else:
system_prompt_penalty = 0.5
details["system_prompt_score"] = round(system_prompt_penalty, 3)
scores.append(system_prompt_penalty)
# 7. Is-claimed-model agreement
if g.is_claimed_model and s.is_claimed_model:
claimed_model_score = 1.0
elif not g.is_claimed_model and not s.is_claimed_model:
claimed_model_score = 0.8 # both seem off
else:
claimed_model_score = 0.2 # mismatch is suspicious
details["is_claimed_model_match"] = round(claimed_model_score, 3)
scores.append(claimed_model_score)
# Add mismatch reasons to details
if s.identity_mismatch_reasons:
details["suspect_mismatch_reasons"] = s.identity_mismatch_reasons
if g.identity_mismatch_reasons:
details["genuine_mismatch_reasons"] = g.identity_mismatch_reasons
avg_score = sum(scores) / len(scores) if scores else 0.0
return DimensionScore(
dimension="Identity",
score=round(avg_score, 3),
weight=WEIGHTS["identity"],
details=details,
)