#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 真实用户行为数据库 包含真实的用户代理、浏览器指纹、访问模式等数据 """ import random import json from datetime import datetime import time class RealUserDatabase: def __init__(self): """初始化真实用户数据库""" # 真实的用户代理字符串(从真实浏览器收集) self.user_agents = [ # Chrome Windows 用户代理 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # Chrome Mac 用户代理 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # Firefox 用户代理 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/121.0", # Safari 用户代理 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15", # Edge 用户代理 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0", "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", # 移动端用户代理 "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36", ] # 真实的屏幕分辨率分布 self.screen_resolutions = [ (1920, 1080), # 最常见 (1366, 768), # 笔记本常见 (1440, 900), # MacBook (1536, 864), # 高DPI (2560, 1440), # 2K显示器 (1600, 900), # 宽屏 (1280, 720), # HD (3840, 2160), # 4K显示器 (2560, 1600), # 16:10 (1920, 1200), # 16:10 ] # 真实的操作系统分布 self.operating_systems = [ {"name": "Windows 10", "weight": 0.4}, {"name": "Windows 11", "weight": 0.25}, {"name": "macOS", "weight": 0.15}, {"name": "Linux", "weight": 0.1}, {"name": "Android", "weight": 0.07}, {"name": "iOS", "weight": 0.03}, ] # 真实的浏览器分布 self.browser_distribution = [ {"name": "Chrome", "weight": 0.65}, {"name": "Firefox", "weight": 0.15}, {"name": "Safari", "weight": 0.12}, {"name": "Edge", "weight": 0.06}, {"name": "Other", "weight": 0.02}, ] # 真实的语言设置 self.languages = [ "zh-CN,zh;q=0.9,en;q=0.8", # 中文用户 "en-US,en;q=0.9", # 英文用户 "en-GB,en;q=0.9,en-US;q=0.8", "zh-TW,zh;q=0.9,en;q=0.8", "ja-JP,ja;q=0.9,en;q=0.8", "ko-KR,ko;q=0.9,en;q=0.8", ] # 真实的时区分布 self.timezones = [ "Asia/Shanghai", "Asia/Tokyo", "America/New_York", "Europe/London", "America/Los_Angeles", "Europe/Berlin", "Asia/Seoul", "Australia/Sydney", ] # 访问模式数据 self.visit_patterns = { "工作时间": { "hours": list(range(9, 18)), "stay_time_multiplier": 0.8, # 工作时间停留时间较短 "scroll_frequency": 1.2, # 滚动更频繁 }, "休闲时间": { "hours": list(range(19, 23)) + list(range(6, 9)), "stay_time_multiplier": 1.5, # 休闲时间停留更久 "scroll_frequency": 0.8, # 滚动较慢 }, "深夜": { "hours": list(range(0, 6)) + [23], "stay_time_multiplier": 2.0, # 深夜停留很久 "scroll_frequency": 0.6, # 滚动很慢 } } def get_random_user_profile(self): """生成一个真实的用户配置文件""" # 选择操作系统 os_choice = self._weighted_choice(self.operating_systems) # 根据操作系统选择合适的用户代理 if "Windows" in os_choice: ua_candidates = [ua for ua in self.user_agents if "Windows NT" in ua] elif "macOS" in os_choice: ua_candidates = [ua for ua in self.user_agents if "Macintosh" in ua] elif "Android" in os_choice: ua_candidates = [ua for ua in self.user_agents if "Android" in ua] elif "iOS" in os_choice: ua_candidates = [ua for ua in self.user_agents if "iPhone" in ua] else: ua_candidates = [ua for ua in self.user_agents if "X11; Linux" in ua] if not ua_candidates: ua_candidates = self.user_agents user_agent = random.choice(ua_candidates) # 选择屏幕分辨率 resolution = random.choice(self.screen_resolutions) # 选择语言 language = random.choice(self.languages) # 选择时区 timezone = random.choice(self.timezones) # 生成其他浏览器指纹信息 profile = { "user_agent": user_agent, "operating_system": os_choice, "screen_resolution": resolution, "viewport_size": ( resolution[0] - random.randint(0, 100), resolution[1] - random.randint(100, 200) ), "language": language, "timezone": timezone, "color_depth": random.choice([24, 32]), "platform": self._extract_platform(user_agent), "cookie_enabled": True, "java_enabled": random.choice([True, False]), "hardware_concurrency": random.choice([2, 4, 8, 12, 16]), "device_memory": random.choice([2, 4, 8, 16, 32]), "connection_type": random.choice(["wifi", "ethernet", "cellular"]), } return profile def get_realistic_headers(self, profile=None, referrer=None): """生成真实的HTTP头部""" if not profile: profile = self.get_random_user_profile() headers = { "User-Agent": profile["user_agent"], "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Language": profile["language"], "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none" if not referrer else "cross-site", "Sec-Fetch-User": "?1", "Cache-Control": "max-age=0", "DNT": str(random.randint(0, 1)), # Do Not Track } if referrer: headers["Referer"] = referrer headers["Sec-Fetch-Site"] = "cross-site" # 根据浏览器类型添加特定头部 if "Chrome" in profile["user_agent"]: headers["sec-ch-ua"] = self._generate_chrome_sec_ch_ua(profile["user_agent"]) headers["sec-ch-ua-mobile"] = "?0" if "Mobile" not in profile["user_agent"] else "?1" headers["sec-ch-ua-platform"] = f'"{profile["platform"]}"' return headers def get_visit_behavior(self): """获取基于时间的访问行为模式""" current_hour = datetime.now().hour # 确定当前时间段 pattern_type = "工作时间" for pattern_name, pattern_data in self.visit_patterns.items(): if current_hour in pattern_data["hours"]: pattern_type = pattern_name break pattern = self.visit_patterns[pattern_type] return { "pattern_type": pattern_type, "stay_time_multiplier": pattern["stay_time_multiplier"], "scroll_frequency": pattern["scroll_frequency"], "reading_speed": random.uniform(200, 400), # 每分钟字数 "interaction_probability": random.uniform(0.3, 0.8), } def get_realistic_timing(self, base_time, behavior=None): """生成真实的访问时间模式""" if not behavior: behavior = self.get_visit_behavior() # 应用时间段影响 adjusted_time = base_time * behavior["stay_time_multiplier"] # 添加人类行为的随机性 variation = random.uniform(0.7, 1.5) final_time = adjusted_time * variation return max(final_time, 1.0) # 最少1秒 def simulate_human_delays(self, action_type="normal"): """模拟真实的人类操作延迟""" delays = { "thinking": (2, 8), # 思考时间 "reading": (3, 15), # 阅读时间 "scrolling": (0.5, 2), # 滚动间隔 "clicking": (0.8, 3), # 点击间隔 "typing": (0.1, 0.5), # 打字间隔 "normal": (1, 4), # 普通操作 } min_delay, max_delay = delays.get(action_type, delays["normal"]) return random.uniform(min_delay, max_delay) def _weighted_choice(self, choices): """根据权重选择""" total = sum(choice["weight"] for choice in choices) r = random.uniform(0, total) upto = 0 for choice in choices: if upto + choice["weight"] >= r: return choice["name"] upto += choice["weight"] return choices[-1]["name"] def _extract_platform(self, user_agent): """从用户代理中提取平台信息""" if "Windows NT 10.0" in user_agent: return "Windows" elif "Windows NT 11.0" in user_agent: return "Windows" elif "Macintosh" in user_agent: return "macOS" elif "X11; Linux" in user_agent: return "Linux" elif "Android" in user_agent: return "Android" elif "iPhone" in user_agent: return "iOS" else: return "Unknown" def _generate_chrome_sec_ch_ua(self, user_agent): """生成Chrome的sec-ch-ua头部""" # 从用户代理中提取Chrome版本 if "Chrome/" in user_agent: version = user_agent.split("Chrome/")[1].split(".")[0] return f'"Not_A Brand";v="8", "Chromium";v="{version}", "Google Chrome";v="{version}"' return '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"' def generate_session_data(self): """生成完整的会话数据""" profile = self.get_random_user_profile() behavior = self.get_visit_behavior() session_data = { "profile": profile, "behavior": behavior, "session_id": f"session_{int(time.time())}_{random.randint(1000, 9999)}", "start_time": datetime.now().isoformat(), "fingerprint": self._generate_browser_fingerprint(profile), } return session_data def _generate_browser_fingerprint(self, profile): """生成浏览器指纹""" fingerprint = { "screen": { "width": profile["screen_resolution"][0], "height": profile["screen_resolution"][1], "colorDepth": profile["color_depth"], "pixelDepth": profile["color_depth"], }, "navigator": { "userAgent": profile["user_agent"], "language": profile["language"].split(",")[0], "languages": profile["language"].split(","), "platform": profile["platform"], "cookieEnabled": profile["cookie_enabled"], "javaEnabled": profile["java_enabled"], "hardwareConcurrency": profile["hardware_concurrency"], "deviceMemory": profile["device_memory"], }, "timezone": profile["timezone"], "webgl_vendor": random.choice([ "Google Inc. (Intel)", "Google Inc. (NVIDIA)", "Google Inc. (AMD)", "Apple Inc.", ]), } return fingerprint # 使用示例 if __name__ == "__main__": db = RealUserDatabase() # 生成用户配置 profile = db.get_random_user_profile() print("用户配置:") print(json.dumps(profile, indent=2, ensure_ascii=False)) # 生成HTTP头部 headers = db.get_realistic_headers(profile) print("\nHTTP头部:") for key, value in headers.items(): print(f"{key}: {value}") # 生成访问行为 behavior = db.get_visit_behavior() print(f"\n访问行为: {behavior}") # 生成完整会话数据 session = db.generate_session_data() print("\n会话数据:") print(json.dumps(session, indent=2, ensure_ascii=False))