- 4维指纹采集: 性能/语言/能力/行为 - models.py 已加入 IdentityFingerprintModel (第5维数据模型) - comparator.py 已升级为5维评分 (含identity维度比较) - reporter.py 已加入身份验证报告输出 - main.py 已集成identity采集流程 - identity collector 待下次提交补充完整代码
279 lines
11 KiB
Python
279 lines
11 KiB
Python
"""Data models for AI API fingerprint detection."""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
@dataclass
|
|
class ChannelConfig:
|
|
"""Configuration for a single API channel."""
|
|
base_url: str
|
|
api_key: str
|
|
model: str
|
|
|
|
|
|
@dataclass
|
|
class CollectionConfig:
|
|
"""Configuration for data collection."""
|
|
repeat_count: int = 3
|
|
timeout: float = 60
|
|
max_tokens: int = 1024
|
|
anthropic_version: str = "2023-06-01"
|
|
|
|
|
|
@dataclass
|
|
class PerformanceFingerprint:
|
|
"""Performance metrics fingerprint."""
|
|
latencies_ms: List[float] = field(default_factory=list)
|
|
p50_latency_ms: float = 0.0
|
|
p95_latency_ms: float = 0.0
|
|
p99_latency_ms: float = 0.0
|
|
avg_ttft_ms: float = 0.0
|
|
avg_tps: float = 0.0
|
|
response_lengths: List[int] = field(default_factory=list)
|
|
avg_response_length: float = 0.0
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"latencies_ms": self.latencies_ms,
|
|
"p50_latency_ms": self.p50_latency_ms,
|
|
"p95_latency_ms": self.p95_latency_ms,
|
|
"p99_latency_ms": self.p99_latency_ms,
|
|
"avg_ttft_ms": self.avg_ttft_ms,
|
|
"avg_tps": self.avg_tps,
|
|
"response_lengths": self.response_lengths,
|
|
"avg_response_length": self.avg_response_length,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> "PerformanceFingerprint":
|
|
return cls(
|
|
latencies_ms=data.get("latencies_ms", []),
|
|
p50_latency_ms=data.get("p50_latency_ms", 0.0),
|
|
p95_latency_ms=data.get("p95_latency_ms", 0.0),
|
|
p99_latency_ms=data.get("p99_latency_ms", 0.0),
|
|
avg_ttft_ms=data.get("avg_ttft_ms", 0.0),
|
|
avg_tps=data.get("avg_tps", 0.0),
|
|
response_lengths=data.get("response_lengths", []),
|
|
avg_response_length=data.get("avg_response_length", 0.0),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class LanguageFingerprint:
|
|
"""Language pattern fingerprint."""
|
|
vocab_richness: float = 0.0
|
|
top_bigrams: Dict[str, int] = field(default_factory=dict)
|
|
format_features: Dict[str, float] = field(default_factory=dict)
|
|
opening_patterns: List[str] = field(default_factory=list)
|
|
closing_patterns: List[str] = field(default_factory=list)
|
|
cjk_ratio: float = 0.0
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"vocab_richness": self.vocab_richness,
|
|
"top_bigrams": self.top_bigrams,
|
|
"format_features": self.format_features,
|
|
"opening_patterns": self.opening_patterns,
|
|
"closing_patterns": self.closing_patterns,
|
|
"cjk_ratio": self.cjk_ratio,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> "LanguageFingerprint":
|
|
return cls(
|
|
vocab_richness=data.get("vocab_richness", 0.0),
|
|
top_bigrams=data.get("top_bigrams", {}),
|
|
format_features=data.get("format_features", {}),
|
|
opening_patterns=data.get("opening_patterns", []),
|
|
closing_patterns=data.get("closing_patterns", []),
|
|
cjk_ratio=data.get("cjk_ratio", 0.0),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class CapabilityFingerprint:
|
|
"""Capability test fingerprint."""
|
|
knowledge_cutoff_responses: Dict[str, str] = field(default_factory=dict)
|
|
math_scores: Dict[str, bool] = field(default_factory=dict)
|
|
code_scores: Dict[str, bool] = field(default_factory=dict)
|
|
refusal_patterns: Dict[str, bool] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"knowledge_cutoff_responses": self.knowledge_cutoff_responses,
|
|
"math_scores": self.math_scores,
|
|
"code_scores": self.code_scores,
|
|
"refusal_patterns": self.refusal_patterns,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> "CapabilityFingerprint":
|
|
return cls(
|
|
knowledge_cutoff_responses=data.get("knowledge_cutoff_responses", {}),
|
|
math_scores=data.get("math_scores", {}),
|
|
code_scores=data.get("code_scores", {}),
|
|
refusal_patterns=data.get("refusal_patterns", {}),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class BehavioralFingerprint:
|
|
"""Behavioral pattern fingerprint."""
|
|
consistency_scores: List[float] = field(default_factory=list)
|
|
instruction_compliance: Dict[str, bool] = field(default_factory=dict)
|
|
response_headers: Dict[str, str] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"consistency_scores": self.consistency_scores,
|
|
"instruction_compliance": self.instruction_compliance,
|
|
"response_headers": self.response_headers,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> "BehavioralFingerprint":
|
|
return cls(
|
|
consistency_scores=data.get("consistency_scores", []),
|
|
instruction_compliance=data.get("instruction_compliance", {}),
|
|
response_headers=data.get("response_headers", {}),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class IdentityFingerprintModel:
|
|
"""Identity verification fingerprint — stored in FullFingerprint.
|
|
This is a lightweight model for serialization; the full IdentityFingerprint
|
|
lives in collectors/identity.py and is converted to/from this for storage.
|
|
"""
|
|
claimed_identity: str = ""
|
|
claimed_developer: str = ""
|
|
identity_consistency: float = 0.0
|
|
detected_model: str = ""
|
|
detection_confidence: float = 0.0
|
|
model_scores: Dict[str, float] = field(default_factory=dict)
|
|
vocab_markers: Dict[str, int] = field(default_factory=dict)
|
|
marker_details: Dict[str, List[str]] = field(default_factory=dict)
|
|
signature_behaviors: Dict[str, str] = field(default_factory=dict)
|
|
system_prompt_leaked: bool = False
|
|
system_prompt_hints: List[str] = field(default_factory=list)
|
|
knowledge_results: Dict[str, bool] = field(default_factory=dict)
|
|
identity_responses: Dict[str, str] = field(default_factory=dict)
|
|
is_claimed_model: bool = True
|
|
identity_mismatch_reasons: List[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"claimed_identity": self.claimed_identity,
|
|
"claimed_developer": self.claimed_developer,
|
|
"identity_consistency": self.identity_consistency,
|
|
"detected_model": self.detected_model,
|
|
"detection_confidence": self.detection_confidence,
|
|
"model_scores": self.model_scores,
|
|
"vocab_markers": self.vocab_markers,
|
|
"marker_details": self.marker_details,
|
|
"signature_behaviors": self.signature_behaviors,
|
|
"system_prompt_leaked": self.system_prompt_leaked,
|
|
"system_prompt_hints": self.system_prompt_hints,
|
|
"knowledge_results": self.knowledge_results,
|
|
"identity_responses": self.identity_responses,
|
|
"is_claimed_model": self.is_claimed_model,
|
|
"identity_mismatch_reasons": self.identity_mismatch_reasons,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> "IdentityFingerprintModel":
|
|
return cls(
|
|
claimed_identity=data.get("claimed_identity", ""),
|
|
claimed_developer=data.get("claimed_developer", ""),
|
|
identity_consistency=data.get("identity_consistency", 0.0),
|
|
detected_model=data.get("detected_model", ""),
|
|
detection_confidence=data.get("detection_confidence", 0.0),
|
|
model_scores=data.get("model_scores", {}),
|
|
vocab_markers=data.get("vocab_markers", {}),
|
|
marker_details=data.get("marker_details", {}),
|
|
signature_behaviors=data.get("signature_behaviors", {}),
|
|
system_prompt_leaked=data.get("system_prompt_leaked", False),
|
|
system_prompt_hints=data.get("system_prompt_hints", []),
|
|
knowledge_results=data.get("knowledge_results", {}),
|
|
identity_responses=data.get("identity_responses", {}),
|
|
is_claimed_model=data.get("is_claimed_model", True),
|
|
identity_mismatch_reasons=data.get("identity_mismatch_reasons", []),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class FullFingerprint:
|
|
"""Complete fingerprint combining all dimensions."""
|
|
channel_name: str = ""
|
|
timestamp: str = ""
|
|
performance: PerformanceFingerprint = field(default_factory=PerformanceFingerprint)
|
|
language: LanguageFingerprint = field(default_factory=LanguageFingerprint)
|
|
capability: CapabilityFingerprint = field(default_factory=CapabilityFingerprint)
|
|
behavioral: BehavioralFingerprint = field(default_factory=BehavioralFingerprint)
|
|
identity: IdentityFingerprintModel = field(default_factory=IdentityFingerprintModel)
|
|
raw_responses: Dict[str, list] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"channel_name": self.channel_name,
|
|
"timestamp": self.timestamp,
|
|
"performance": self.performance.to_dict(),
|
|
"language": self.language.to_dict(),
|
|
"capability": self.capability.to_dict(),
|
|
"behavioral": self.behavioral.to_dict(),
|
|
"identity": self.identity.to_dict(),
|
|
"raw_responses": self.raw_responses,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> "FullFingerprint":
|
|
return cls(
|
|
channel_name=data.get("channel_name", ""),
|
|
timestamp=data.get("timestamp", ""),
|
|
performance=PerformanceFingerprint.from_dict(data.get("performance", {})),
|
|
language=LanguageFingerprint.from_dict(data.get("language", {})),
|
|
capability=CapabilityFingerprint.from_dict(data.get("capability", {})),
|
|
behavioral=BehavioralFingerprint.from_dict(data.get("behavioral", {})),
|
|
identity=IdentityFingerprintModel.from_dict(data.get("identity", {})),
|
|
raw_responses=data.get("raw_responses", {}),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class DimensionScore:
|
|
"""Score for a single comparison dimension."""
|
|
dimension: str = ""
|
|
score: float = 0.0
|
|
weight: float = 0.0
|
|
details: Dict = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"dimension": self.dimension,
|
|
"score": self.score,
|
|
"weight": self.weight,
|
|
"details": self.details,
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class ComparisonResult:
|
|
"""Final comparison result across all dimensions."""
|
|
genuine_channel: str = ""
|
|
suspect_channel: str = ""
|
|
dimension_scores: List[DimensionScore] = field(default_factory=list)
|
|
overall_score: float = 0.0
|
|
verdict: str = ""
|
|
timestamp: str = ""
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"genuine_channel": self.genuine_channel,
|
|
"suspect_channel": self.suspect_channel,
|
|
"dimension_scores": [ds.to_dict() for ds in self.dimension_scores],
|
|
"overall_score": self.overall_score,
|
|
"verdict": self.verdict,
|
|
"timestamp": self.timestamp,
|
|
}
|