Files
shualiangv1/real_user_database.py
huangzhenpc 39ce9404db as
2025-07-18 13:55:44 +08:00

412 lines
17 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
真实用户数据库 - 用于生成真实的用户配置和行为模式
"""
import random
import time
class RealUserDatabase:
def __init__(self):
"""初始化真实用户数据库"""
# 真实的桌面端用户代理
self.desktop_user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.2 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/120.0",
]
# 真实的移动端用户代理
self.mobile_user_agents = [
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.6 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 13; SM-A515F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 12; SM-G996B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (iPad; CPU OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPad; CPU OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
]
# 屏幕分辨率
self.screen_resolutions = {
"desktop": [
"1920x1080", "1366x768", "1536x864", "1440x900", "1280x720",
"2560x1440", "1680x1050", "1600x900", "1280x1024", "1024x768"
],
"mobile": [
"375x667", "414x896", "390x844", "393x851", "412x915",
"360x640", "375x812", "428x926", "320x568", "768x1024"
]
}
# 语言偏好
self.languages = [
"zh-CN,zh;q=0.9,en;q=0.8",
"zh-CN,zh;q=0.9,en;q=0.7,ja;q=0.6",
"zh-CN,zh;q=0.9",
"en-US,en;q=0.9,zh;q=0.8",
"en-US,en;q=0.9",
"ja-JP,ja;q=0.9,zh;q=0.8,en;q=0.7"
]
# 时区
self.timezones = [
"Asia/Shanghai", "Asia/Hong_Kong", "Asia/Taipei",
"Asia/Tokyo", "Asia/Seoul", "America/New_York",
"America/Los_Angeles", "Europe/London", "Europe/Paris"
]
# 访问行为模式
self.visit_patterns = [
{
"pattern_type": "casual_gamer",
"description": "休闲游戏玩家",
"stay_time_range": [20, 60],
"pages_per_visit": [2, 5],
"return_probability": 0.7,
"ad_tolerance": 0.3
},
{
"pattern_type": "hardcore_gamer",
"description": "硬核游戏玩家",
"stay_time_range": [60, 180],
"pages_per_visit": [5, 10],
"return_probability": 0.8,
"ad_tolerance": 0.1
},
{
"pattern_type": "quick_browser",
"description": "快速浏览者",
"stay_time_range": [5, 20],
"pages_per_visit": [1, 3],
"return_probability": 0.3,
"ad_tolerance": 0.2
},
{
"pattern_type": "explorer",
"description": "探索者",
"stay_time_range": [30, 90],
"pages_per_visit": [3, 8],
"return_probability": 0.6,
"ad_tolerance": 0.4
},
{
"pattern_type": "mobile_user",
"description": "移动端用户",
"stay_time_range": [15, 45],
"pages_per_visit": [2, 4],
"return_probability": 0.5,
"ad_tolerance": 0.2
}
]
# 地理位置分布
self.geographic_regions = [
{"region": "North America", "weight": 0.25, "timezone": "America/New_York"},
{"region": "East Asia", "weight": 0.35, "timezone": "Asia/Shanghai"},
{"region": "Europe", "weight": 0.20, "timezone": "Europe/London"},
{"region": "Southeast Asia", "weight": 0.15, "timezone": "Asia/Tokyo"},
{"region": "Other", "weight": 0.05, "timezone": "UTC"}
]
# 设备类型分布
self.device_distribution = {
"mobile": 0.85, # 85%移动设备
"desktop": 0.12, # 12%桌面设备
"tablet": 0.03 # 3%平板设备
}
# 浏览器分布
self.browser_distribution = {
"chrome": 0.65,
"safari": 0.20,
"firefox": 0.10,
"edge": 0.05
}
# 操作系统分布
self.os_distribution = {
"android": 0.45,
"ios": 0.30,
"windows": 0.15,
"macos": 0.08,
"linux": 0.02
}
def get_random_user_profile(self):
"""生成随机的用户配置"""
# 决定设备类型
device_type = self._weighted_choice(self.device_distribution)
# 根据设备类型选择用户代理
if device_type == "mobile":
user_agent = random.choice(self.mobile_user_agents)
screen_resolution = random.choice(self.screen_resolutions["mobile"])
else:
user_agent = random.choice(self.desktop_user_agents)
screen_resolution = random.choice(self.screen_resolutions["desktop"])
# 选择地理区域
region = self._weighted_choice_list(self.geographic_regions)
# 生成用户配置
profile = {
"user_agent": user_agent,
"device_type": device_type,
"screen_resolution": screen_resolution,
"language": random.choice(self.languages),
"timezone": region["timezone"],
"region": region["region"],
"browser": self._extract_browser_from_ua(user_agent),
"os": self._extract_os_from_ua(user_agent),
"connection_type": self._generate_connection_type(device_type),
"hardware_concurrency": self._generate_hardware_concurrency(device_type),
"memory": self._generate_memory_info(device_type),
"color_depth": random.choice([24, 32]),
"pixel_ratio": self._generate_pixel_ratio(device_type)
}
return profile
def get_visit_behavior(self):
"""生成访问行为模式"""
pattern = random.choice(self.visit_patterns)
# 添加随机变化
behavior = pattern.copy()
behavior["actual_stay_time"] = random.randint(
pattern["stay_time_range"][0],
pattern["stay_time_range"][1]
)
behavior["actual_pages"] = random.randint(
pattern["pages_per_visit"][0],
pattern["pages_per_visit"][1]
)
# 添加时间相关的行为调整
current_hour = time.localtime().tm_hour
if 9 <= current_hour <= 18: # 工作时间
behavior["ad_tolerance"] *= 0.7 # 更不容忍广告
behavior["actual_stay_time"] *= 0.8 # 停留时间较短
elif 19 <= current_hour <= 23: # 休闲时间
behavior["ad_tolerance"] *= 1.2 # 相对容忍广告
behavior["actual_stay_time"] *= 1.1 # 停留时间较长
return behavior
def get_ad_interaction_preferences(self):
"""生成广告交互偏好"""
return {
"close_button_preference": random.uniform(0.7, 0.9), # 关闭按钮偏好
"native_ad_tolerance": random.uniform(0.3, 0.6), # 原生广告容忍度
"video_ad_skip_rate": random.uniform(0.6, 0.8), # 视频广告跳过率
"banner_ignore_rate": random.uniform(0.8, 0.95), # 横幅广告忽略率
"popup_close_speed": random.uniform(0.5, 3.0), # 弹窗关闭速度
"ad_click_probability": random.uniform(0.05, 0.15) # 广告点击概率
}
def get_realistic_timing(self):
"""生成真实的时间模式"""
return {
"page_load_wait": random.uniform(2, 5), # 页面加载等待时间
"ad_recognition_time": random.uniform(0.5, 2.5), # 广告识别时间
"close_button_search_time": random.uniform(0.3, 2.0), # 关闭按钮搜索时间
"decision_making_time": random.uniform(1, 4), # 决策时间
"click_execution_time": random.uniform(0.1, 0.8), # 点击执行时间
"post_action_pause": random.uniform(0.5, 2.0), # 动作后暂停时间
"scroll_speed": random.uniform(0.5, 2.0), # 滚动速度
"read_speed": random.uniform(200, 400) # 阅读速度(字符/分钟)
}
def _weighted_choice(self, choices_dict):
"""根据权重选择"""
total = sum(choices_dict.values())
r = random.uniform(0, total)
upto = 0
for choice, weight in choices_dict.items():
if upto + weight >= r:
return choice
upto += weight
return list(choices_dict.keys())[-1]
def _weighted_choice_list(self, choices_list):
"""根据权重列表选择"""
total_weight = sum(item["weight"] for item in choices_list)
r = random.uniform(0, total_weight)
upto = 0
for item in choices_list:
if upto + item["weight"] >= r:
return item
upto += item["weight"]
return choices_list[-1]
def _extract_browser_from_ua(self, user_agent):
"""从用户代理提取浏览器类型"""
if "Chrome" in user_agent:
return "chrome"
elif "Safari" in user_agent and "Chrome" not in user_agent:
return "safari"
elif "Firefox" in user_agent:
return "firefox"
elif "Edge" in user_agent:
return "edge"
else:
return "unknown"
def _extract_os_from_ua(self, user_agent):
"""从用户代理提取操作系统"""
if "Windows NT" in user_agent:
return "windows"
elif "Mac OS X" in user_agent:
if "iPhone" in user_agent or "iPad" in user_agent:
return "ios"
else:
return "macos"
elif "Linux" in user_agent:
if "Android" in user_agent:
return "android"
else:
return "linux"
else:
return "unknown"
def _generate_connection_type(self, device_type):
"""生成连接类型"""
if device_type == "mobile":
return random.choice(["4g", "5g", "wifi", "3g"])
else:
return random.choice(["wifi", "ethernet", "cable"])
def _generate_hardware_concurrency(self, device_type):
"""生成硬件并发数"""
if device_type == "mobile":
return random.choice([2, 4, 6, 8])
else:
return random.choice([4, 6, 8, 12, 16])
def _generate_memory_info(self, device_type):
"""生成内存信息"""
if device_type == "mobile":
return random.choice([2, 3, 4, 6, 8]) # GB
else:
return random.choice([4, 8, 16, 32]) # GB
def _generate_pixel_ratio(self, device_type):
"""生成像素比"""
if device_type == "mobile":
return random.choice([1.0, 1.5, 2.0, 2.5, 3.0])
else:
return random.choice([1.0, 1.25, 1.5, 2.0])
def get_session_fingerprint(self):
"""生成会话指纹"""
return {
"canvas_fingerprint": self._generate_canvas_fingerprint(),
"webgl_fingerprint": self._generate_webgl_fingerprint(),
"audio_fingerprint": self._generate_audio_fingerprint(),
"font_fingerprint": self._generate_font_fingerprint(),
"timezone_offset": random.randint(-12, 12) * 60,
"touch_support": random.choice([True, False]),
"webrtc_support": random.choice([True, False]),
"battery_level": random.randint(20, 100) if random.random() < 0.8 else None
}
def _generate_canvas_fingerprint(self):
"""生成Canvas指纹"""
return "canvas_" + str(random.randint(100000, 999999))
def _generate_webgl_fingerprint(self):
"""生成WebGL指纹"""
return "webgl_" + str(random.randint(100000, 999999))
def _generate_audio_fingerprint(self):
"""生成音频指纹"""
return "audio_" + str(random.randint(100000, 999999))
def _generate_font_fingerprint(self):
"""生成字体指纹"""
common_fonts = [
"Arial", "Times New Roman", "Helvetica", "Georgia", "Verdana",
"Trebuchet MS", "Comic Sans MS", "Impact", "Courier New"
]
available_fonts = random.sample(common_fonts, random.randint(5, 9))
return ",".join(available_fonts)
def get_behavioral_patterns(self):
"""获取行为模式"""
patterns = {
"mouse_movement": {
"speed": random.uniform(0.5, 2.0),
"smoothness": random.uniform(0.6, 1.0),
"pause_frequency": random.uniform(0.1, 0.4)
},
"scroll_behavior": {
"speed": random.uniform(0.3, 1.5),
"direction_changes": random.randint(2, 8),
"pause_at_content": random.choice([True, False])
},
"click_patterns": {
"double_click_speed": random.uniform(200, 500), # ms
"click_precision": random.uniform(0.8, 1.0),
"accidental_clicks": random.uniform(0.01, 0.05)
},
"keyboard_behavior": {
"typing_speed": random.uniform(30, 80), # WPM
"error_rate": random.uniform(0.02, 0.08),
"pause_between_words": random.uniform(0.1, 0.5)
}
}
return patterns
def get_attention_patterns(self):
"""获取注意力模式"""
return {
"attention_span": random.uniform(30, 300), # 注意力持续时间(秒)
"distraction_probability": random.uniform(0.1, 0.3), # 分心概率
"focus_recovery_time": random.uniform(2, 10), # 重新聚焦时间
"multitasking_tendency": random.uniform(0.0, 0.5), # 多任务倾向
"content_engagement": random.uniform(0.3, 0.9) # 内容参与度
}
def simulate_realistic_delays(self):
"""模拟真实的延迟"""
return {
"network_latency": random.uniform(50, 200), # 网络延迟(ms)
"processing_delay": random.uniform(100, 500), # 处理延迟(ms)
"render_delay": random.uniform(50, 150), # 渲染延迟(ms)
"user_reaction_delay": random.uniform(200, 800) # 用户反应延迟(ms)
}
# 使用示例
if __name__ == "__main__":
db = RealUserDatabase()
# 生成随机用户配置
profile = db.get_random_user_profile()
print("用户配置:")
for key, value in profile.items():
print(f" {key}: {value}")
print("\n访问行为:")
behavior = db.get_visit_behavior()
for key, value in behavior.items():
print(f" {key}: {value}")
print("\n广告交互偏好:")
ad_prefs = db.get_ad_interaction_preferences()
for key, value in ad_prefs.items():
print(f" {key}: {value:.2f}")
print("\n真实时间模式:")
timing = db.get_realistic_timing()
for key, value in timing.items():
print(f" {key}: {value:.2f}")