Files
ai-xn-check/core/models.py
nosqli cdcd69256b feat: AI API 指纹检测对比工具 - 初始版本
- 4维指纹采集: 性能/语言/能力/行为
- models.py 已加入 IdentityFingerprintModel (第5维数据模型)
- comparator.py 已升级为5维评分 (含identity维度比较)
- reporter.py 已加入身份验证报告输出
- main.py 已集成identity采集流程
- identity collector 待下次提交补充完整代码
2026-03-09 00:15:03 +08:00

279 lines
11 KiB
Python

"""Data models for AI API fingerprint detection."""
from dataclasses import dataclass, field
from typing import Dict, List, Optional
@dataclass
class ChannelConfig:
"""Configuration for a single API channel."""
base_url: str
api_key: str
model: str
@dataclass
class CollectionConfig:
"""Configuration for data collection."""
repeat_count: int = 3
timeout: float = 60
max_tokens: int = 1024
anthropic_version: str = "2023-06-01"
@dataclass
class PerformanceFingerprint:
"""Performance metrics fingerprint."""
latencies_ms: List[float] = field(default_factory=list)
p50_latency_ms: float = 0.0
p95_latency_ms: float = 0.0
p99_latency_ms: float = 0.0
avg_ttft_ms: float = 0.0
avg_tps: float = 0.0
response_lengths: List[int] = field(default_factory=list)
avg_response_length: float = 0.0
def to_dict(self) -> dict:
return {
"latencies_ms": self.latencies_ms,
"p50_latency_ms": self.p50_latency_ms,
"p95_latency_ms": self.p95_latency_ms,
"p99_latency_ms": self.p99_latency_ms,
"avg_ttft_ms": self.avg_ttft_ms,
"avg_tps": self.avg_tps,
"response_lengths": self.response_lengths,
"avg_response_length": self.avg_response_length,
}
@classmethod
def from_dict(cls, data: dict) -> "PerformanceFingerprint":
return cls(
latencies_ms=data.get("latencies_ms", []),
p50_latency_ms=data.get("p50_latency_ms", 0.0),
p95_latency_ms=data.get("p95_latency_ms", 0.0),
p99_latency_ms=data.get("p99_latency_ms", 0.0),
avg_ttft_ms=data.get("avg_ttft_ms", 0.0),
avg_tps=data.get("avg_tps", 0.0),
response_lengths=data.get("response_lengths", []),
avg_response_length=data.get("avg_response_length", 0.0),
)
@dataclass
class LanguageFingerprint:
"""Language pattern fingerprint."""
vocab_richness: float = 0.0
top_bigrams: Dict[str, int] = field(default_factory=dict)
format_features: Dict[str, float] = field(default_factory=dict)
opening_patterns: List[str] = field(default_factory=list)
closing_patterns: List[str] = field(default_factory=list)
cjk_ratio: float = 0.0
def to_dict(self) -> dict:
return {
"vocab_richness": self.vocab_richness,
"top_bigrams": self.top_bigrams,
"format_features": self.format_features,
"opening_patterns": self.opening_patterns,
"closing_patterns": self.closing_patterns,
"cjk_ratio": self.cjk_ratio,
}
@classmethod
def from_dict(cls, data: dict) -> "LanguageFingerprint":
return cls(
vocab_richness=data.get("vocab_richness", 0.0),
top_bigrams=data.get("top_bigrams", {}),
format_features=data.get("format_features", {}),
opening_patterns=data.get("opening_patterns", []),
closing_patterns=data.get("closing_patterns", []),
cjk_ratio=data.get("cjk_ratio", 0.0),
)
@dataclass
class CapabilityFingerprint:
"""Capability test fingerprint."""
knowledge_cutoff_responses: Dict[str, str] = field(default_factory=dict)
math_scores: Dict[str, bool] = field(default_factory=dict)
code_scores: Dict[str, bool] = field(default_factory=dict)
refusal_patterns: Dict[str, bool] = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"knowledge_cutoff_responses": self.knowledge_cutoff_responses,
"math_scores": self.math_scores,
"code_scores": self.code_scores,
"refusal_patterns": self.refusal_patterns,
}
@classmethod
def from_dict(cls, data: dict) -> "CapabilityFingerprint":
return cls(
knowledge_cutoff_responses=data.get("knowledge_cutoff_responses", {}),
math_scores=data.get("math_scores", {}),
code_scores=data.get("code_scores", {}),
refusal_patterns=data.get("refusal_patterns", {}),
)
@dataclass
class BehavioralFingerprint:
"""Behavioral pattern fingerprint."""
consistency_scores: List[float] = field(default_factory=list)
instruction_compliance: Dict[str, bool] = field(default_factory=dict)
response_headers: Dict[str, str] = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"consistency_scores": self.consistency_scores,
"instruction_compliance": self.instruction_compliance,
"response_headers": self.response_headers,
}
@classmethod
def from_dict(cls, data: dict) -> "BehavioralFingerprint":
return cls(
consistency_scores=data.get("consistency_scores", []),
instruction_compliance=data.get("instruction_compliance", {}),
response_headers=data.get("response_headers", {}),
)
@dataclass
class IdentityFingerprintModel:
"""Identity verification fingerprint — stored in FullFingerprint.
This is a lightweight model for serialization; the full IdentityFingerprint
lives in collectors/identity.py and is converted to/from this for storage.
"""
claimed_identity: str = ""
claimed_developer: str = ""
identity_consistency: float = 0.0
detected_model: str = ""
detection_confidence: float = 0.0
model_scores: Dict[str, float] = field(default_factory=dict)
vocab_markers: Dict[str, int] = field(default_factory=dict)
marker_details: Dict[str, List[str]] = field(default_factory=dict)
signature_behaviors: Dict[str, str] = field(default_factory=dict)
system_prompt_leaked: bool = False
system_prompt_hints: List[str] = field(default_factory=list)
knowledge_results: Dict[str, bool] = field(default_factory=dict)
identity_responses: Dict[str, str] = field(default_factory=dict)
is_claimed_model: bool = True
identity_mismatch_reasons: List[str] = field(default_factory=list)
def to_dict(self) -> dict:
return {
"claimed_identity": self.claimed_identity,
"claimed_developer": self.claimed_developer,
"identity_consistency": self.identity_consistency,
"detected_model": self.detected_model,
"detection_confidence": self.detection_confidence,
"model_scores": self.model_scores,
"vocab_markers": self.vocab_markers,
"marker_details": self.marker_details,
"signature_behaviors": self.signature_behaviors,
"system_prompt_leaked": self.system_prompt_leaked,
"system_prompt_hints": self.system_prompt_hints,
"knowledge_results": self.knowledge_results,
"identity_responses": self.identity_responses,
"is_claimed_model": self.is_claimed_model,
"identity_mismatch_reasons": self.identity_mismatch_reasons,
}
@classmethod
def from_dict(cls, data: dict) -> "IdentityFingerprintModel":
return cls(
claimed_identity=data.get("claimed_identity", ""),
claimed_developer=data.get("claimed_developer", ""),
identity_consistency=data.get("identity_consistency", 0.0),
detected_model=data.get("detected_model", ""),
detection_confidence=data.get("detection_confidence", 0.0),
model_scores=data.get("model_scores", {}),
vocab_markers=data.get("vocab_markers", {}),
marker_details=data.get("marker_details", {}),
signature_behaviors=data.get("signature_behaviors", {}),
system_prompt_leaked=data.get("system_prompt_leaked", False),
system_prompt_hints=data.get("system_prompt_hints", []),
knowledge_results=data.get("knowledge_results", {}),
identity_responses=data.get("identity_responses", {}),
is_claimed_model=data.get("is_claimed_model", True),
identity_mismatch_reasons=data.get("identity_mismatch_reasons", []),
)
@dataclass
class FullFingerprint:
"""Complete fingerprint combining all dimensions."""
channel_name: str = ""
timestamp: str = ""
performance: PerformanceFingerprint = field(default_factory=PerformanceFingerprint)
language: LanguageFingerprint = field(default_factory=LanguageFingerprint)
capability: CapabilityFingerprint = field(default_factory=CapabilityFingerprint)
behavioral: BehavioralFingerprint = field(default_factory=BehavioralFingerprint)
identity: IdentityFingerprintModel = field(default_factory=IdentityFingerprintModel)
raw_responses: Dict[str, list] = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"channel_name": self.channel_name,
"timestamp": self.timestamp,
"performance": self.performance.to_dict(),
"language": self.language.to_dict(),
"capability": self.capability.to_dict(),
"behavioral": self.behavioral.to_dict(),
"identity": self.identity.to_dict(),
"raw_responses": self.raw_responses,
}
@classmethod
def from_dict(cls, data: dict) -> "FullFingerprint":
return cls(
channel_name=data.get("channel_name", ""),
timestamp=data.get("timestamp", ""),
performance=PerformanceFingerprint.from_dict(data.get("performance", {})),
language=LanguageFingerprint.from_dict(data.get("language", {})),
capability=CapabilityFingerprint.from_dict(data.get("capability", {})),
behavioral=BehavioralFingerprint.from_dict(data.get("behavioral", {})),
identity=IdentityFingerprintModel.from_dict(data.get("identity", {})),
raw_responses=data.get("raw_responses", {}),
)
@dataclass
class DimensionScore:
"""Score for a single comparison dimension."""
dimension: str = ""
score: float = 0.0
weight: float = 0.0
details: Dict = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"dimension": self.dimension,
"score": self.score,
"weight": self.weight,
"details": self.details,
}
@dataclass
class ComparisonResult:
"""Final comparison result across all dimensions."""
genuine_channel: str = ""
suspect_channel: str = ""
dimension_scores: List[DimensionScore] = field(default_factory=list)
overall_score: float = 0.0
verdict: str = ""
timestamp: str = ""
def to_dict(self) -> dict:
return {
"genuine_channel": self.genuine_channel,
"suspect_channel": self.suspect_channel,
"dimension_scores": [ds.to_dict() for ds in self.dimension_scores],
"overall_score": self.overall_score,
"verdict": self.verdict,
"timestamp": self.timestamp,
}