#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 真实用户数据库 - 用于生成真实的用户配置和行为模式 """ import random import time class RealUserDatabase: def __init__(self): """初始化真实用户数据库""" # 真实的桌面端用户代理 self.desktop_user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.2 Safari/605.1.15", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/120.0", ] # 真实的移动端用户代理 self.mobile_user_agents = [ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (iPhone; CPU iPhone OS 15_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.6 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36", "Mozilla/5.0 (Linux; Android 13; SM-A515F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36", "Mozilla/5.0 (Linux; Android 12; SM-G996B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36", "Mozilla/5.0 (iPad; CPU OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (iPad; CPU OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", ] # 屏幕分辨率 self.screen_resolutions = { "desktop": [ "1920x1080", "1366x768", "1536x864", "1440x900", "1280x720", "2560x1440", "1680x1050", "1600x900", "1280x1024", "1024x768" ], "mobile": [ "375x667", "414x896", "390x844", "393x851", "412x915", "360x640", "375x812", "428x926", "320x568", "768x1024" ] } # 语言偏好 self.languages = [ "zh-CN,zh;q=0.9,en;q=0.8", "zh-CN,zh;q=0.9,en;q=0.7,ja;q=0.6", "zh-CN,zh;q=0.9", "en-US,en;q=0.9,zh;q=0.8", "en-US,en;q=0.9", "ja-JP,ja;q=0.9,zh;q=0.8,en;q=0.7" ] # 时区 self.timezones = [ "Asia/Shanghai", "Asia/Hong_Kong", "Asia/Taipei", "Asia/Tokyo", "Asia/Seoul", "America/New_York", "America/Los_Angeles", "Europe/London", "Europe/Paris" ] # 访问行为模式 self.visit_patterns = [ { "pattern_type": "casual_gamer", "description": "休闲游戏玩家", "stay_time_range": [20, 60], "pages_per_visit": [2, 5], "return_probability": 0.7, "ad_tolerance": 0.3 }, { "pattern_type": "hardcore_gamer", "description": "硬核游戏玩家", "stay_time_range": [60, 180], "pages_per_visit": [5, 10], "return_probability": 0.8, "ad_tolerance": 0.1 }, { "pattern_type": "quick_browser", "description": "快速浏览者", "stay_time_range": [5, 20], "pages_per_visit": [1, 3], "return_probability": 0.3, "ad_tolerance": 0.2 }, { "pattern_type": "explorer", "description": "探索者", "stay_time_range": [30, 90], "pages_per_visit": [3, 8], "return_probability": 0.6, "ad_tolerance": 0.4 }, { "pattern_type": "mobile_user", "description": "移动端用户", "stay_time_range": [15, 45], "pages_per_visit": [2, 4], "return_probability": 0.5, "ad_tolerance": 0.2 } ] # 地理位置分布 self.geographic_regions = [ {"region": "North America", "weight": 0.25, "timezone": "America/New_York"}, {"region": "East Asia", "weight": 0.35, "timezone": "Asia/Shanghai"}, {"region": "Europe", "weight": 0.20, "timezone": "Europe/London"}, {"region": "Southeast Asia", "weight": 0.15, "timezone": "Asia/Tokyo"}, {"region": "Other", "weight": 0.05, "timezone": "UTC"} ] # 设备类型分布 self.device_distribution = { "mobile": 0.85, # 85%移动设备 "desktop": 0.12, # 12%桌面设备 "tablet": 0.03 # 3%平板设备 } # 浏览器分布 self.browser_distribution = { "chrome": 0.65, "safari": 0.20, "firefox": 0.10, "edge": 0.05 } # 操作系统分布 self.os_distribution = { "android": 0.45, "ios": 0.30, "windows": 0.15, "macos": 0.08, "linux": 0.02 } def get_random_user_profile(self): """生成随机的用户配置""" # 决定设备类型 device_type = self._weighted_choice(self.device_distribution) # 根据设备类型选择用户代理 if device_type == "mobile": user_agent = random.choice(self.mobile_user_agents) screen_resolution = random.choice(self.screen_resolutions["mobile"]) else: user_agent = random.choice(self.desktop_user_agents) screen_resolution = random.choice(self.screen_resolutions["desktop"]) # 选择地理区域 region = self._weighted_choice_list(self.geographic_regions) # 生成用户配置 profile = { "user_agent": user_agent, "device_type": device_type, "screen_resolution": screen_resolution, "language": random.choice(self.languages), "timezone": region["timezone"], "region": region["region"], "browser": self._extract_browser_from_ua(user_agent), "os": self._extract_os_from_ua(user_agent), "connection_type": self._generate_connection_type(device_type), "hardware_concurrency": self._generate_hardware_concurrency(device_type), "memory": self._generate_memory_info(device_type), "color_depth": random.choice([24, 32]), "pixel_ratio": self._generate_pixel_ratio(device_type) } return profile def get_visit_behavior(self): """生成访问行为模式""" pattern = random.choice(self.visit_patterns) # 添加随机变化 behavior = pattern.copy() behavior["actual_stay_time"] = random.randint( pattern["stay_time_range"][0], pattern["stay_time_range"][1] ) behavior["actual_pages"] = random.randint( pattern["pages_per_visit"][0], pattern["pages_per_visit"][1] ) # 添加时间相关的行为调整 current_hour = time.localtime().tm_hour if 9 <= current_hour <= 18: # 工作时间 behavior["ad_tolerance"] *= 0.7 # 更不容忍广告 behavior["actual_stay_time"] *= 0.8 # 停留时间较短 elif 19 <= current_hour <= 23: # 休闲时间 behavior["ad_tolerance"] *= 1.2 # 相对容忍广告 behavior["actual_stay_time"] *= 1.1 # 停留时间较长 return behavior def get_ad_interaction_preferences(self): """生成广告交互偏好""" return { "close_button_preference": random.uniform(0.7, 0.9), # 关闭按钮偏好 "native_ad_tolerance": random.uniform(0.3, 0.6), # 原生广告容忍度 "video_ad_skip_rate": random.uniform(0.6, 0.8), # 视频广告跳过率 "banner_ignore_rate": random.uniform(0.8, 0.95), # 横幅广告忽略率 "popup_close_speed": random.uniform(0.5, 3.0), # 弹窗关闭速度 "ad_click_probability": random.uniform(0.05, 0.15) # 广告点击概率 } def get_realistic_timing(self): """生成真实的时间模式""" return { "page_load_wait": random.uniform(2, 5), # 页面加载等待时间 "ad_recognition_time": random.uniform(0.5, 2.5), # 广告识别时间 "close_button_search_time": random.uniform(0.3, 2.0), # 关闭按钮搜索时间 "decision_making_time": random.uniform(1, 4), # 决策时间 "click_execution_time": random.uniform(0.1, 0.8), # 点击执行时间 "post_action_pause": random.uniform(0.5, 2.0), # 动作后暂停时间 "scroll_speed": random.uniform(0.5, 2.0), # 滚动速度 "read_speed": random.uniform(200, 400) # 阅读速度(字符/分钟) } def _weighted_choice(self, choices_dict): """根据权重选择""" total = sum(choices_dict.values()) r = random.uniform(0, total) upto = 0 for choice, weight in choices_dict.items(): if upto + weight >= r: return choice upto += weight return list(choices_dict.keys())[-1] def _weighted_choice_list(self, choices_list): """根据权重列表选择""" total_weight = sum(item["weight"] for item in choices_list) r = random.uniform(0, total_weight) upto = 0 for item in choices_list: if upto + item["weight"] >= r: return item upto += item["weight"] return choices_list[-1] def _extract_browser_from_ua(self, user_agent): """从用户代理提取浏览器类型""" if "Chrome" in user_agent: return "chrome" elif "Safari" in user_agent and "Chrome" not in user_agent: return "safari" elif "Firefox" in user_agent: return "firefox" elif "Edge" in user_agent: return "edge" else: return "unknown" def _extract_os_from_ua(self, user_agent): """从用户代理提取操作系统""" if "Windows NT" in user_agent: return "windows" elif "Mac OS X" in user_agent: if "iPhone" in user_agent or "iPad" in user_agent: return "ios" else: return "macos" elif "Linux" in user_agent: if "Android" in user_agent: return "android" else: return "linux" else: return "unknown" def _generate_connection_type(self, device_type): """生成连接类型""" if device_type == "mobile": return random.choice(["4g", "5g", "wifi", "3g"]) else: return random.choice(["wifi", "ethernet", "cable"]) def _generate_hardware_concurrency(self, device_type): """生成硬件并发数""" if device_type == "mobile": return random.choice([2, 4, 6, 8]) else: return random.choice([4, 6, 8, 12, 16]) def _generate_memory_info(self, device_type): """生成内存信息""" if device_type == "mobile": return random.choice([2, 3, 4, 6, 8]) # GB else: return random.choice([4, 8, 16, 32]) # GB def _generate_pixel_ratio(self, device_type): """生成像素比""" if device_type == "mobile": return random.choice([1.0, 1.5, 2.0, 2.5, 3.0]) else: return random.choice([1.0, 1.25, 1.5, 2.0]) def get_session_fingerprint(self): """生成会话指纹""" return { "canvas_fingerprint": self._generate_canvas_fingerprint(), "webgl_fingerprint": self._generate_webgl_fingerprint(), "audio_fingerprint": self._generate_audio_fingerprint(), "font_fingerprint": self._generate_font_fingerprint(), "timezone_offset": random.randint(-12, 12) * 60, "touch_support": random.choice([True, False]), "webrtc_support": random.choice([True, False]), "battery_level": random.randint(20, 100) if random.random() < 0.8 else None } def _generate_canvas_fingerprint(self): """生成Canvas指纹""" return "canvas_" + str(random.randint(100000, 999999)) def _generate_webgl_fingerprint(self): """生成WebGL指纹""" return "webgl_" + str(random.randint(100000, 999999)) def _generate_audio_fingerprint(self): """生成音频指纹""" return "audio_" + str(random.randint(100000, 999999)) def _generate_font_fingerprint(self): """生成字体指纹""" common_fonts = [ "Arial", "Times New Roman", "Helvetica", "Georgia", "Verdana", "Trebuchet MS", "Comic Sans MS", "Impact", "Courier New" ] available_fonts = random.sample(common_fonts, random.randint(5, 9)) return ",".join(available_fonts) def get_behavioral_patterns(self): """获取行为模式""" patterns = { "mouse_movement": { "speed": random.uniform(0.5, 2.0), "smoothness": random.uniform(0.6, 1.0), "pause_frequency": random.uniform(0.1, 0.4) }, "scroll_behavior": { "speed": random.uniform(0.3, 1.5), "direction_changes": random.randint(2, 8), "pause_at_content": random.choice([True, False]) }, "click_patterns": { "double_click_speed": random.uniform(200, 500), # ms "click_precision": random.uniform(0.8, 1.0), "accidental_clicks": random.uniform(0.01, 0.05) }, "keyboard_behavior": { "typing_speed": random.uniform(30, 80), # WPM "error_rate": random.uniform(0.02, 0.08), "pause_between_words": random.uniform(0.1, 0.5) } } return patterns def get_attention_patterns(self): """获取注意力模式""" return { "attention_span": random.uniform(30, 300), # 注意力持续时间(秒) "distraction_probability": random.uniform(0.1, 0.3), # 分心概率 "focus_recovery_time": random.uniform(2, 10), # 重新聚焦时间 "multitasking_tendency": random.uniform(0.0, 0.5), # 多任务倾向 "content_engagement": random.uniform(0.3, 0.9) # 内容参与度 } def simulate_realistic_delays(self): """模拟真实的延迟""" return { "network_latency": random.uniform(50, 200), # 网络延迟(ms) "processing_delay": random.uniform(100, 500), # 处理延迟(ms) "render_delay": random.uniform(50, 150), # 渲染延迟(ms) "user_reaction_delay": random.uniform(200, 800) # 用户反应延迟(ms) } # 使用示例 if __name__ == "__main__": db = RealUserDatabase() # 生成随机用户配置 profile = db.get_random_user_profile() print("用户配置:") for key, value in profile.items(): print(f" {key}: {value}") print("\n访问行为:") behavior = db.get_visit_behavior() for key, value in behavior.items(): print(f" {key}: {value}") print("\n广告交互偏好:") ad_prefs = db.get_ad_interaction_preferences() for key, value in ad_prefs.items(): print(f" {key}: {value:.2f}") print("\n真实时间模式:") timing = db.get_realistic_timing() for key, value in timing.items(): print(f" {key}: {value:.2f}秒")