"""Data models for AI API fingerprint detection.""" from dataclasses import dataclass, field from typing import Dict, List, Optional @dataclass class ChannelConfig: """Configuration for a single API channel.""" base_url: str api_key: str model: str @dataclass class CollectionConfig: """Configuration for data collection.""" repeat_count: int = 3 timeout: float = 60 max_tokens: int = 1024 anthropic_version: str = "2023-06-01" @dataclass class PerformanceFingerprint: """Performance metrics fingerprint.""" latencies_ms: List[float] = field(default_factory=list) p50_latency_ms: float = 0.0 p95_latency_ms: float = 0.0 p99_latency_ms: float = 0.0 avg_ttft_ms: float = 0.0 avg_tps: float = 0.0 response_lengths: List[int] = field(default_factory=list) avg_response_length: float = 0.0 def to_dict(self) -> dict: return { "latencies_ms": self.latencies_ms, "p50_latency_ms": self.p50_latency_ms, "p95_latency_ms": self.p95_latency_ms, "p99_latency_ms": self.p99_latency_ms, "avg_ttft_ms": self.avg_ttft_ms, "avg_tps": self.avg_tps, "response_lengths": self.response_lengths, "avg_response_length": self.avg_response_length, } @classmethod def from_dict(cls, data: dict) -> "PerformanceFingerprint": return cls( latencies_ms=data.get("latencies_ms", []), p50_latency_ms=data.get("p50_latency_ms", 0.0), p95_latency_ms=data.get("p95_latency_ms", 0.0), p99_latency_ms=data.get("p99_latency_ms", 0.0), avg_ttft_ms=data.get("avg_ttft_ms", 0.0), avg_tps=data.get("avg_tps", 0.0), response_lengths=data.get("response_lengths", []), avg_response_length=data.get("avg_response_length", 0.0), ) @dataclass class LanguageFingerprint: """Language pattern fingerprint.""" vocab_richness: float = 0.0 top_bigrams: Dict[str, int] = field(default_factory=dict) format_features: Dict[str, float] = field(default_factory=dict) opening_patterns: List[str] = field(default_factory=list) closing_patterns: List[str] = field(default_factory=list) cjk_ratio: float = 0.0 def to_dict(self) -> dict: return { "vocab_richness": self.vocab_richness, "top_bigrams": self.top_bigrams, "format_features": self.format_features, "opening_patterns": self.opening_patterns, "closing_patterns": self.closing_patterns, "cjk_ratio": self.cjk_ratio, } @classmethod def from_dict(cls, data: dict) -> "LanguageFingerprint": return cls( vocab_richness=data.get("vocab_richness", 0.0), top_bigrams=data.get("top_bigrams", {}), format_features=data.get("format_features", {}), opening_patterns=data.get("opening_patterns", []), closing_patterns=data.get("closing_patterns", []), cjk_ratio=data.get("cjk_ratio", 0.0), ) @dataclass class CapabilityFingerprint: """Capability test fingerprint.""" knowledge_cutoff_responses: Dict[str, str] = field(default_factory=dict) math_scores: Dict[str, bool] = field(default_factory=dict) code_scores: Dict[str, bool] = field(default_factory=dict) refusal_patterns: Dict[str, bool] = field(default_factory=dict) def to_dict(self) -> dict: return { "knowledge_cutoff_responses": self.knowledge_cutoff_responses, "math_scores": self.math_scores, "code_scores": self.code_scores, "refusal_patterns": self.refusal_patterns, } @classmethod def from_dict(cls, data: dict) -> "CapabilityFingerprint": return cls( knowledge_cutoff_responses=data.get("knowledge_cutoff_responses", {}), math_scores=data.get("math_scores", {}), code_scores=data.get("code_scores", {}), refusal_patterns=data.get("refusal_patterns", {}), ) @dataclass class BehavioralFingerprint: """Behavioral pattern fingerprint.""" consistency_scores: List[float] = field(default_factory=list) instruction_compliance: Dict[str, bool] = field(default_factory=dict) response_headers: Dict[str, str] = field(default_factory=dict) def to_dict(self) -> dict: return { "consistency_scores": self.consistency_scores, "instruction_compliance": self.instruction_compliance, "response_headers": self.response_headers, } @classmethod def from_dict(cls, data: dict) -> "BehavioralFingerprint": return cls( consistency_scores=data.get("consistency_scores", []), instruction_compliance=data.get("instruction_compliance", {}), response_headers=data.get("response_headers", {}), ) @dataclass class IdentityFingerprintModel: """Identity verification fingerprint — stored in FullFingerprint. This is a lightweight model for serialization; the full IdentityFingerprint lives in collectors/identity.py and is converted to/from this for storage. """ claimed_identity: str = "" claimed_developer: str = "" identity_consistency: float = 0.0 detected_model: str = "" detection_confidence: float = 0.0 model_scores: Dict[str, float] = field(default_factory=dict) vocab_markers: Dict[str, int] = field(default_factory=dict) marker_details: Dict[str, List[str]] = field(default_factory=dict) signature_behaviors: Dict[str, str] = field(default_factory=dict) system_prompt_leaked: bool = False system_prompt_hints: List[str] = field(default_factory=list) knowledge_results: Dict[str, bool] = field(default_factory=dict) identity_responses: Dict[str, str] = field(default_factory=dict) is_claimed_model: bool = True identity_mismatch_reasons: List[str] = field(default_factory=list) def to_dict(self) -> dict: return { "claimed_identity": self.claimed_identity, "claimed_developer": self.claimed_developer, "identity_consistency": self.identity_consistency, "detected_model": self.detected_model, "detection_confidence": self.detection_confidence, "model_scores": self.model_scores, "vocab_markers": self.vocab_markers, "marker_details": self.marker_details, "signature_behaviors": self.signature_behaviors, "system_prompt_leaked": self.system_prompt_leaked, "system_prompt_hints": self.system_prompt_hints, "knowledge_results": self.knowledge_results, "identity_responses": self.identity_responses, "is_claimed_model": self.is_claimed_model, "identity_mismatch_reasons": self.identity_mismatch_reasons, } @classmethod def from_dict(cls, data: dict) -> "IdentityFingerprintModel": return cls( claimed_identity=data.get("claimed_identity", ""), claimed_developer=data.get("claimed_developer", ""), identity_consistency=data.get("identity_consistency", 0.0), detected_model=data.get("detected_model", ""), detection_confidence=data.get("detection_confidence", 0.0), model_scores=data.get("model_scores", {}), vocab_markers=data.get("vocab_markers", {}), marker_details=data.get("marker_details", {}), signature_behaviors=data.get("signature_behaviors", {}), system_prompt_leaked=data.get("system_prompt_leaked", False), system_prompt_hints=data.get("system_prompt_hints", []), knowledge_results=data.get("knowledge_results", {}), identity_responses=data.get("identity_responses", {}), is_claimed_model=data.get("is_claimed_model", True), identity_mismatch_reasons=data.get("identity_mismatch_reasons", []), ) @dataclass class FullFingerprint: """Complete fingerprint combining all dimensions.""" channel_name: str = "" timestamp: str = "" performance: PerformanceFingerprint = field(default_factory=PerformanceFingerprint) language: LanguageFingerprint = field(default_factory=LanguageFingerprint) capability: CapabilityFingerprint = field(default_factory=CapabilityFingerprint) behavioral: BehavioralFingerprint = field(default_factory=BehavioralFingerprint) identity: IdentityFingerprintModel = field(default_factory=IdentityFingerprintModel) raw_responses: Dict[str, list] = field(default_factory=dict) def to_dict(self) -> dict: return { "channel_name": self.channel_name, "timestamp": self.timestamp, "performance": self.performance.to_dict(), "language": self.language.to_dict(), "capability": self.capability.to_dict(), "behavioral": self.behavioral.to_dict(), "identity": self.identity.to_dict(), "raw_responses": self.raw_responses, } @classmethod def from_dict(cls, data: dict) -> "FullFingerprint": return cls( channel_name=data.get("channel_name", ""), timestamp=data.get("timestamp", ""), performance=PerformanceFingerprint.from_dict(data.get("performance", {})), language=LanguageFingerprint.from_dict(data.get("language", {})), capability=CapabilityFingerprint.from_dict(data.get("capability", {})), behavioral=BehavioralFingerprint.from_dict(data.get("behavioral", {})), identity=IdentityFingerprintModel.from_dict(data.get("identity", {})), raw_responses=data.get("raw_responses", {}), ) @dataclass class DimensionScore: """Score for a single comparison dimension.""" dimension: str = "" score: float = 0.0 weight: float = 0.0 details: Dict = field(default_factory=dict) def to_dict(self) -> dict: return { "dimension": self.dimension, "score": self.score, "weight": self.weight, "details": self.details, } @dataclass class ComparisonResult: """Final comparison result across all dimensions.""" genuine_channel: str = "" suspect_channel: str = "" dimension_scores: List[DimensionScore] = field(default_factory=list) overall_score: float = 0.0 verdict: str = "" timestamp: str = "" def to_dict(self) -> dict: return { "genuine_channel": self.genuine_channel, "suspect_channel": self.suspect_channel, "dimension_scores": [ds.to_dict() for ds in self.dimension_scores], "overall_score": self.overall_score, "verdict": self.verdict, "timestamp": self.timestamp, }