as
This commit is contained in:
@@ -1,360 +1,412 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
真实用户行为数据库
|
||||
包含真实的用户代理、浏览器指纹、访问模式等数据
|
||||
真实用户数据库 - 用于生成真实的用户配置和行为模式
|
||||
"""
|
||||
|
||||
import random
|
||||
import json
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
class RealUserDatabase:
|
||||
def __init__(self):
|
||||
"""初始化真实用户数据库"""
|
||||
|
||||
# 真实的用户代理字符串(从真实浏览器收集)
|
||||
self.user_agents = [
|
||||
# Chrome Windows 用户代理
|
||||
# 真实的桌面端用户代理
|
||||
self.desktop_user_agents = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
|
||||
# Chrome Mac 用户代理
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
|
||||
# Firefox 用户代理
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/121.0",
|
||||
|
||||
# Safari 用户代理
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15",
|
||||
|
||||
# Edge 用户代理
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
"Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0",
|
||||
|
||||
# 移动端用户代理
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.2 Safari/605.1.15",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/120.0",
|
||||
]
|
||||
|
||||
# 真实的移动端用户代理
|
||||
self.mobile_user_agents = [
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.6 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 13; SM-A515F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 12; SM-G996B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (iPad; CPU OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPad; CPU OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
|
||||
]
|
||||
|
||||
# 真实的屏幕分辨率分布
|
||||
self.screen_resolutions = [
|
||||
(1920, 1080), # 最常见
|
||||
(1366, 768), # 笔记本常见
|
||||
(1440, 900), # MacBook
|
||||
(1536, 864), # 高DPI
|
||||
(2560, 1440), # 2K显示器
|
||||
(1600, 900), # 宽屏
|
||||
(1280, 720), # HD
|
||||
(3840, 2160), # 4K显示器
|
||||
(2560, 1600), # 16:10
|
||||
(1920, 1200), # 16:10
|
||||
]
|
||||
# 屏幕分辨率
|
||||
self.screen_resolutions = {
|
||||
"desktop": [
|
||||
"1920x1080", "1366x768", "1536x864", "1440x900", "1280x720",
|
||||
"2560x1440", "1680x1050", "1600x900", "1280x1024", "1024x768"
|
||||
],
|
||||
"mobile": [
|
||||
"375x667", "414x896", "390x844", "393x851", "412x915",
|
||||
"360x640", "375x812", "428x926", "320x568", "768x1024"
|
||||
]
|
||||
}
|
||||
|
||||
# 真实的操作系统分布
|
||||
self.operating_systems = [
|
||||
{"name": "Windows 10", "weight": 0.4},
|
||||
{"name": "Windows 11", "weight": 0.25},
|
||||
{"name": "macOS", "weight": 0.15},
|
||||
{"name": "Linux", "weight": 0.1},
|
||||
{"name": "Android", "weight": 0.07},
|
||||
{"name": "iOS", "weight": 0.03},
|
||||
]
|
||||
|
||||
# 真实的浏览器分布
|
||||
self.browser_distribution = [
|
||||
{"name": "Chrome", "weight": 0.65},
|
||||
{"name": "Firefox", "weight": 0.15},
|
||||
{"name": "Safari", "weight": 0.12},
|
||||
{"name": "Edge", "weight": 0.06},
|
||||
{"name": "Other", "weight": 0.02},
|
||||
]
|
||||
|
||||
# 真实的语言设置
|
||||
# 语言偏好
|
||||
self.languages = [
|
||||
"zh-CN,zh;q=0.9,en;q=0.8", # 中文用户
|
||||
"en-US,en;q=0.9", # 英文用户
|
||||
"en-GB,en;q=0.9,en-US;q=0.8",
|
||||
"zh-TW,zh;q=0.9,en;q=0.8",
|
||||
"ja-JP,ja;q=0.9,en;q=0.8",
|
||||
"ko-KR,ko;q=0.9,en;q=0.8",
|
||||
"zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"zh-CN,zh;q=0.9,en;q=0.7,ja;q=0.6",
|
||||
"zh-CN,zh;q=0.9",
|
||||
"en-US,en;q=0.9,zh;q=0.8",
|
||||
"en-US,en;q=0.9",
|
||||
"ja-JP,ja;q=0.9,zh;q=0.8,en;q=0.7"
|
||||
]
|
||||
|
||||
# 真实的时区分布
|
||||
# 时区
|
||||
self.timezones = [
|
||||
"Asia/Shanghai",
|
||||
"Asia/Tokyo",
|
||||
"America/New_York",
|
||||
"Europe/London",
|
||||
"America/Los_Angeles",
|
||||
"Europe/Berlin",
|
||||
"Asia/Seoul",
|
||||
"Australia/Sydney",
|
||||
"Asia/Shanghai", "Asia/Hong_Kong", "Asia/Taipei",
|
||||
"Asia/Tokyo", "Asia/Seoul", "America/New_York",
|
||||
"America/Los_Angeles", "Europe/London", "Europe/Paris"
|
||||
]
|
||||
|
||||
# 访问模式数据
|
||||
self.visit_patterns = {
|
||||
"工作时间": {
|
||||
"hours": list(range(9, 18)),
|
||||
"stay_time_multiplier": 0.8, # 工作时间停留时间较短
|
||||
"scroll_frequency": 1.2, # 滚动更频繁
|
||||
# 访问行为模式
|
||||
self.visit_patterns = [
|
||||
{
|
||||
"pattern_type": "casual_gamer",
|
||||
"description": "休闲游戏玩家",
|
||||
"stay_time_range": [20, 60],
|
||||
"pages_per_visit": [2, 5],
|
||||
"return_probability": 0.7,
|
||||
"ad_tolerance": 0.3
|
||||
},
|
||||
"休闲时间": {
|
||||
"hours": list(range(19, 23)) + list(range(6, 9)),
|
||||
"stay_time_multiplier": 1.5, # 休闲时间停留更久
|
||||
"scroll_frequency": 0.8, # 滚动较慢
|
||||
{
|
||||
"pattern_type": "hardcore_gamer",
|
||||
"description": "硬核游戏玩家",
|
||||
"stay_time_range": [60, 180],
|
||||
"pages_per_visit": [5, 10],
|
||||
"return_probability": 0.8,
|
||||
"ad_tolerance": 0.1
|
||||
},
|
||||
"深夜": {
|
||||
"hours": list(range(0, 6)) + [23],
|
||||
"stay_time_multiplier": 2.0, # 深夜停留很久
|
||||
"scroll_frequency": 0.6, # 滚动很慢
|
||||
{
|
||||
"pattern_type": "quick_browser",
|
||||
"description": "快速浏览者",
|
||||
"stay_time_range": [5, 20],
|
||||
"pages_per_visit": [1, 3],
|
||||
"return_probability": 0.3,
|
||||
"ad_tolerance": 0.2
|
||||
},
|
||||
{
|
||||
"pattern_type": "explorer",
|
||||
"description": "探索者",
|
||||
"stay_time_range": [30, 90],
|
||||
"pages_per_visit": [3, 8],
|
||||
"return_probability": 0.6,
|
||||
"ad_tolerance": 0.4
|
||||
},
|
||||
{
|
||||
"pattern_type": "mobile_user",
|
||||
"description": "移动端用户",
|
||||
"stay_time_range": [15, 45],
|
||||
"pages_per_visit": [2, 4],
|
||||
"return_probability": 0.5,
|
||||
"ad_tolerance": 0.2
|
||||
}
|
||||
]
|
||||
|
||||
# 地理位置分布
|
||||
self.geographic_regions = [
|
||||
{"region": "North America", "weight": 0.25, "timezone": "America/New_York"},
|
||||
{"region": "East Asia", "weight": 0.35, "timezone": "Asia/Shanghai"},
|
||||
{"region": "Europe", "weight": 0.20, "timezone": "Europe/London"},
|
||||
{"region": "Southeast Asia", "weight": 0.15, "timezone": "Asia/Tokyo"},
|
||||
{"region": "Other", "weight": 0.05, "timezone": "UTC"}
|
||||
]
|
||||
|
||||
# 设备类型分布
|
||||
self.device_distribution = {
|
||||
"mobile": 0.85, # 85%移动设备
|
||||
"desktop": 0.12, # 12%桌面设备
|
||||
"tablet": 0.03 # 3%平板设备
|
||||
}
|
||||
|
||||
# 浏览器分布
|
||||
self.browser_distribution = {
|
||||
"chrome": 0.65,
|
||||
"safari": 0.20,
|
||||
"firefox": 0.10,
|
||||
"edge": 0.05
|
||||
}
|
||||
|
||||
# 操作系统分布
|
||||
self.os_distribution = {
|
||||
"android": 0.45,
|
||||
"ios": 0.30,
|
||||
"windows": 0.15,
|
||||
"macos": 0.08,
|
||||
"linux": 0.02
|
||||
}
|
||||
|
||||
def get_random_user_profile(self):
|
||||
"""生成一个真实的用户配置文件"""
|
||||
"""生成随机的用户配置"""
|
||||
|
||||
# 选择操作系统
|
||||
os_choice = self._weighted_choice(self.operating_systems)
|
||||
# 决定设备类型
|
||||
device_type = self._weighted_choice(self.device_distribution)
|
||||
|
||||
# 根据操作系统选择合适的用户代理
|
||||
if "Windows" in os_choice:
|
||||
ua_candidates = [ua for ua in self.user_agents if "Windows NT" in ua]
|
||||
elif "macOS" in os_choice:
|
||||
ua_candidates = [ua for ua in self.user_agents if "Macintosh" in ua]
|
||||
elif "Android" in os_choice:
|
||||
ua_candidates = [ua for ua in self.user_agents if "Android" in ua]
|
||||
elif "iOS" in os_choice:
|
||||
ua_candidates = [ua for ua in self.user_agents if "iPhone" in ua]
|
||||
# 根据设备类型选择用户代理
|
||||
if device_type == "mobile":
|
||||
user_agent = random.choice(self.mobile_user_agents)
|
||||
screen_resolution = random.choice(self.screen_resolutions["mobile"])
|
||||
else:
|
||||
ua_candidates = [ua for ua in self.user_agents if "X11; Linux" in ua]
|
||||
user_agent = random.choice(self.desktop_user_agents)
|
||||
screen_resolution = random.choice(self.screen_resolutions["desktop"])
|
||||
|
||||
if not ua_candidates:
|
||||
ua_candidates = self.user_agents
|
||||
# 选择地理区域
|
||||
region = self._weighted_choice_list(self.geographic_regions)
|
||||
|
||||
user_agent = random.choice(ua_candidates)
|
||||
|
||||
# 选择屏幕分辨率
|
||||
resolution = random.choice(self.screen_resolutions)
|
||||
|
||||
# 选择语言
|
||||
language = random.choice(self.languages)
|
||||
|
||||
# 选择时区
|
||||
timezone = random.choice(self.timezones)
|
||||
|
||||
# 生成其他浏览器指纹信息
|
||||
# 生成用户配置
|
||||
profile = {
|
||||
"user_agent": user_agent,
|
||||
"operating_system": os_choice,
|
||||
"screen_resolution": resolution,
|
||||
"viewport_size": (
|
||||
resolution[0] - random.randint(0, 100),
|
||||
resolution[1] - random.randint(100, 200)
|
||||
),
|
||||
"language": language,
|
||||
"timezone": timezone,
|
||||
"device_type": device_type,
|
||||
"screen_resolution": screen_resolution,
|
||||
"language": random.choice(self.languages),
|
||||
"timezone": region["timezone"],
|
||||
"region": region["region"],
|
||||
"browser": self._extract_browser_from_ua(user_agent),
|
||||
"os": self._extract_os_from_ua(user_agent),
|
||||
"connection_type": self._generate_connection_type(device_type),
|
||||
"hardware_concurrency": self._generate_hardware_concurrency(device_type),
|
||||
"memory": self._generate_memory_info(device_type),
|
||||
"color_depth": random.choice([24, 32]),
|
||||
"platform": self._extract_platform(user_agent),
|
||||
"cookie_enabled": True,
|
||||
"java_enabled": random.choice([True, False]),
|
||||
"hardware_concurrency": random.choice([2, 4, 8, 12, 16]),
|
||||
"device_memory": random.choice([2, 4, 8, 16, 32]),
|
||||
"connection_type": random.choice(["wifi", "ethernet", "cellular"]),
|
||||
"pixel_ratio": self._generate_pixel_ratio(device_type)
|
||||
}
|
||||
|
||||
return profile
|
||||
|
||||
def get_realistic_headers(self, profile=None, referrer=None):
|
||||
"""生成真实的HTTP头部"""
|
||||
if not profile:
|
||||
profile = self.get_random_user_profile()
|
||||
|
||||
headers = {
|
||||
"User-Agent": profile["user_agent"],
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Language": profile["language"],
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none" if not referrer else "cross-site",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Cache-Control": "max-age=0",
|
||||
"DNT": str(random.randint(0, 1)), # Do Not Track
|
||||
}
|
||||
|
||||
if referrer:
|
||||
headers["Referer"] = referrer
|
||||
headers["Sec-Fetch-Site"] = "cross-site"
|
||||
|
||||
# 根据浏览器类型添加特定头部
|
||||
if "Chrome" in profile["user_agent"]:
|
||||
headers["sec-ch-ua"] = self._generate_chrome_sec_ch_ua(profile["user_agent"])
|
||||
headers["sec-ch-ua-mobile"] = "?0" if "Mobile" not in profile["user_agent"] else "?1"
|
||||
headers["sec-ch-ua-platform"] = f'"{profile["platform"]}"'
|
||||
|
||||
return headers
|
||||
|
||||
def get_visit_behavior(self):
|
||||
"""获取基于时间的访问行为模式"""
|
||||
current_hour = datetime.now().hour
|
||||
"""生成访问行为模式"""
|
||||
pattern = random.choice(self.visit_patterns)
|
||||
|
||||
# 确定当前时间段
|
||||
pattern_type = "工作时间"
|
||||
for pattern_name, pattern_data in self.visit_patterns.items():
|
||||
if current_hour in pattern_data["hours"]:
|
||||
pattern_type = pattern_name
|
||||
break
|
||||
# 添加随机变化
|
||||
behavior = pattern.copy()
|
||||
behavior["actual_stay_time"] = random.randint(
|
||||
pattern["stay_time_range"][0],
|
||||
pattern["stay_time_range"][1]
|
||||
)
|
||||
behavior["actual_pages"] = random.randint(
|
||||
pattern["pages_per_visit"][0],
|
||||
pattern["pages_per_visit"][1]
|
||||
)
|
||||
|
||||
pattern = self.visit_patterns[pattern_type]
|
||||
# 添加时间相关的行为调整
|
||||
current_hour = time.localtime().tm_hour
|
||||
if 9 <= current_hour <= 18: # 工作时间
|
||||
behavior["ad_tolerance"] *= 0.7 # 更不容忍广告
|
||||
behavior["actual_stay_time"] *= 0.8 # 停留时间较短
|
||||
elif 19 <= current_hour <= 23: # 休闲时间
|
||||
behavior["ad_tolerance"] *= 1.2 # 相对容忍广告
|
||||
behavior["actual_stay_time"] *= 1.1 # 停留时间较长
|
||||
|
||||
return behavior
|
||||
|
||||
def get_ad_interaction_preferences(self):
|
||||
"""生成广告交互偏好"""
|
||||
return {
|
||||
"pattern_type": pattern_type,
|
||||
"stay_time_multiplier": pattern["stay_time_multiplier"],
|
||||
"scroll_frequency": pattern["scroll_frequency"],
|
||||
"reading_speed": random.uniform(200, 400), # 每分钟字数
|
||||
"interaction_probability": random.uniform(0.3, 0.8),
|
||||
"close_button_preference": random.uniform(0.7, 0.9), # 关闭按钮偏好
|
||||
"native_ad_tolerance": random.uniform(0.3, 0.6), # 原生广告容忍度
|
||||
"video_ad_skip_rate": random.uniform(0.6, 0.8), # 视频广告跳过率
|
||||
"banner_ignore_rate": random.uniform(0.8, 0.95), # 横幅广告忽略率
|
||||
"popup_close_speed": random.uniform(0.5, 3.0), # 弹窗关闭速度
|
||||
"ad_click_probability": random.uniform(0.05, 0.15) # 广告点击概率
|
||||
}
|
||||
|
||||
def get_realistic_timing(self, base_time, behavior=None):
|
||||
"""生成真实的访问时间模式"""
|
||||
if not behavior:
|
||||
behavior = self.get_visit_behavior()
|
||||
|
||||
# 应用时间段影响
|
||||
adjusted_time = base_time * behavior["stay_time_multiplier"]
|
||||
|
||||
# 添加人类行为的随机性
|
||||
variation = random.uniform(0.7, 1.5)
|
||||
final_time = adjusted_time * variation
|
||||
|
||||
return max(final_time, 1.0) # 最少1秒
|
||||
|
||||
def simulate_human_delays(self, action_type="normal"):
|
||||
"""模拟真实的人类操作延迟"""
|
||||
delays = {
|
||||
"thinking": (2, 8), # 思考时间
|
||||
"reading": (3, 15), # 阅读时间
|
||||
"scrolling": (0.5, 2), # 滚动间隔
|
||||
"clicking": (0.8, 3), # 点击间隔
|
||||
"typing": (0.1, 0.5), # 打字间隔
|
||||
"normal": (1, 4), # 普通操作
|
||||
def get_realistic_timing(self):
|
||||
"""生成真实的时间模式"""
|
||||
return {
|
||||
"page_load_wait": random.uniform(2, 5), # 页面加载等待时间
|
||||
"ad_recognition_time": random.uniform(0.5, 2.5), # 广告识别时间
|
||||
"close_button_search_time": random.uniform(0.3, 2.0), # 关闭按钮搜索时间
|
||||
"decision_making_time": random.uniform(1, 4), # 决策时间
|
||||
"click_execution_time": random.uniform(0.1, 0.8), # 点击执行时间
|
||||
"post_action_pause": random.uniform(0.5, 2.0), # 动作后暂停时间
|
||||
"scroll_speed": random.uniform(0.5, 2.0), # 滚动速度
|
||||
"read_speed": random.uniform(200, 400) # 阅读速度(字符/分钟)
|
||||
}
|
||||
|
||||
min_delay, max_delay = delays.get(action_type, delays["normal"])
|
||||
return random.uniform(min_delay, max_delay)
|
||||
|
||||
def _weighted_choice(self, choices):
|
||||
def _weighted_choice(self, choices_dict):
|
||||
"""根据权重选择"""
|
||||
total = sum(choice["weight"] for choice in choices)
|
||||
total = sum(choices_dict.values())
|
||||
r = random.uniform(0, total)
|
||||
upto = 0
|
||||
for choice in choices:
|
||||
if upto + choice["weight"] >= r:
|
||||
return choice["name"]
|
||||
upto += choice["weight"]
|
||||
return choices[-1]["name"]
|
||||
for choice, weight in choices_dict.items():
|
||||
if upto + weight >= r:
|
||||
return choice
|
||||
upto += weight
|
||||
return list(choices_dict.keys())[-1]
|
||||
|
||||
def _extract_platform(self, user_agent):
|
||||
"""从用户代理中提取平台信息"""
|
||||
if "Windows NT 10.0" in user_agent:
|
||||
return "Windows"
|
||||
elif "Windows NT 11.0" in user_agent:
|
||||
return "Windows"
|
||||
elif "Macintosh" in user_agent:
|
||||
return "macOS"
|
||||
elif "X11; Linux" in user_agent:
|
||||
return "Linux"
|
||||
elif "Android" in user_agent:
|
||||
return "Android"
|
||||
elif "iPhone" in user_agent:
|
||||
return "iOS"
|
||||
def _weighted_choice_list(self, choices_list):
|
||||
"""根据权重列表选择"""
|
||||
total_weight = sum(item["weight"] for item in choices_list)
|
||||
r = random.uniform(0, total_weight)
|
||||
upto = 0
|
||||
for item in choices_list:
|
||||
if upto + item["weight"] >= r:
|
||||
return item
|
||||
upto += item["weight"]
|
||||
return choices_list[-1]
|
||||
|
||||
def _extract_browser_from_ua(self, user_agent):
|
||||
"""从用户代理提取浏览器类型"""
|
||||
if "Chrome" in user_agent:
|
||||
return "chrome"
|
||||
elif "Safari" in user_agent and "Chrome" not in user_agent:
|
||||
return "safari"
|
||||
elif "Firefox" in user_agent:
|
||||
return "firefox"
|
||||
elif "Edge" in user_agent:
|
||||
return "edge"
|
||||
else:
|
||||
return "Unknown"
|
||||
return "unknown"
|
||||
|
||||
def _generate_chrome_sec_ch_ua(self, user_agent):
|
||||
"""生成Chrome的sec-ch-ua头部"""
|
||||
# 从用户代理中提取Chrome版本
|
||||
if "Chrome/" in user_agent:
|
||||
version = user_agent.split("Chrome/")[1].split(".")[0]
|
||||
return f'"Not_A Brand";v="8", "Chromium";v="{version}", "Google Chrome";v="{version}"'
|
||||
return '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"'
|
||||
def _extract_os_from_ua(self, user_agent):
|
||||
"""从用户代理提取操作系统"""
|
||||
if "Windows NT" in user_agent:
|
||||
return "windows"
|
||||
elif "Mac OS X" in user_agent:
|
||||
if "iPhone" in user_agent or "iPad" in user_agent:
|
||||
return "ios"
|
||||
else:
|
||||
return "macos"
|
||||
elif "Linux" in user_agent:
|
||||
if "Android" in user_agent:
|
||||
return "android"
|
||||
else:
|
||||
return "linux"
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
def generate_session_data(self):
|
||||
"""生成完整的会话数据"""
|
||||
profile = self.get_random_user_profile()
|
||||
behavior = self.get_visit_behavior()
|
||||
|
||||
session_data = {
|
||||
"profile": profile,
|
||||
"behavior": behavior,
|
||||
"session_id": f"session_{int(time.time())}_{random.randint(1000, 9999)}",
|
||||
"start_time": datetime.now().isoformat(),
|
||||
"fingerprint": self._generate_browser_fingerprint(profile),
|
||||
def _generate_connection_type(self, device_type):
|
||||
"""生成连接类型"""
|
||||
if device_type == "mobile":
|
||||
return random.choice(["4g", "5g", "wifi", "3g"])
|
||||
else:
|
||||
return random.choice(["wifi", "ethernet", "cable"])
|
||||
|
||||
def _generate_hardware_concurrency(self, device_type):
|
||||
"""生成硬件并发数"""
|
||||
if device_type == "mobile":
|
||||
return random.choice([2, 4, 6, 8])
|
||||
else:
|
||||
return random.choice([4, 6, 8, 12, 16])
|
||||
|
||||
def _generate_memory_info(self, device_type):
|
||||
"""生成内存信息"""
|
||||
if device_type == "mobile":
|
||||
return random.choice([2, 3, 4, 6, 8]) # GB
|
||||
else:
|
||||
return random.choice([4, 8, 16, 32]) # GB
|
||||
|
||||
def _generate_pixel_ratio(self, device_type):
|
||||
"""生成像素比"""
|
||||
if device_type == "mobile":
|
||||
return random.choice([1.0, 1.5, 2.0, 2.5, 3.0])
|
||||
else:
|
||||
return random.choice([1.0, 1.25, 1.5, 2.0])
|
||||
|
||||
def get_session_fingerprint(self):
|
||||
"""生成会话指纹"""
|
||||
return {
|
||||
"canvas_fingerprint": self._generate_canvas_fingerprint(),
|
||||
"webgl_fingerprint": self._generate_webgl_fingerprint(),
|
||||
"audio_fingerprint": self._generate_audio_fingerprint(),
|
||||
"font_fingerprint": self._generate_font_fingerprint(),
|
||||
"timezone_offset": random.randint(-12, 12) * 60,
|
||||
"touch_support": random.choice([True, False]),
|
||||
"webrtc_support": random.choice([True, False]),
|
||||
"battery_level": random.randint(20, 100) if random.random() < 0.8 else None
|
||||
}
|
||||
|
||||
return session_data
|
||||
|
||||
def _generate_browser_fingerprint(self, profile):
|
||||
"""生成浏览器指纹"""
|
||||
fingerprint = {
|
||||
"screen": {
|
||||
"width": profile["screen_resolution"][0],
|
||||
"height": profile["screen_resolution"][1],
|
||||
"colorDepth": profile["color_depth"],
|
||||
"pixelDepth": profile["color_depth"],
|
||||
def _generate_canvas_fingerprint(self):
|
||||
"""生成Canvas指纹"""
|
||||
return "canvas_" + str(random.randint(100000, 999999))
|
||||
|
||||
def _generate_webgl_fingerprint(self):
|
||||
"""生成WebGL指纹"""
|
||||
return "webgl_" + str(random.randint(100000, 999999))
|
||||
|
||||
def _generate_audio_fingerprint(self):
|
||||
"""生成音频指纹"""
|
||||
return "audio_" + str(random.randint(100000, 999999))
|
||||
|
||||
def _generate_font_fingerprint(self):
|
||||
"""生成字体指纹"""
|
||||
common_fonts = [
|
||||
"Arial", "Times New Roman", "Helvetica", "Georgia", "Verdana",
|
||||
"Trebuchet MS", "Comic Sans MS", "Impact", "Courier New"
|
||||
]
|
||||
available_fonts = random.sample(common_fonts, random.randint(5, 9))
|
||||
return ",".join(available_fonts)
|
||||
|
||||
def get_behavioral_patterns(self):
|
||||
"""获取行为模式"""
|
||||
patterns = {
|
||||
"mouse_movement": {
|
||||
"speed": random.uniform(0.5, 2.0),
|
||||
"smoothness": random.uniform(0.6, 1.0),
|
||||
"pause_frequency": random.uniform(0.1, 0.4)
|
||||
},
|
||||
"navigator": {
|
||||
"userAgent": profile["user_agent"],
|
||||
"language": profile["language"].split(",")[0],
|
||||
"languages": profile["language"].split(","),
|
||||
"platform": profile["platform"],
|
||||
"cookieEnabled": profile["cookie_enabled"],
|
||||
"javaEnabled": profile["java_enabled"],
|
||||
"hardwareConcurrency": profile["hardware_concurrency"],
|
||||
"deviceMemory": profile["device_memory"],
|
||||
"scroll_behavior": {
|
||||
"speed": random.uniform(0.3, 1.5),
|
||||
"direction_changes": random.randint(2, 8),
|
||||
"pause_at_content": random.choice([True, False])
|
||||
},
|
||||
"timezone": profile["timezone"],
|
||||
"webgl_vendor": random.choice([
|
||||
"Google Inc. (Intel)",
|
||||
"Google Inc. (NVIDIA)",
|
||||
"Google Inc. (AMD)",
|
||||
"Apple Inc.",
|
||||
]),
|
||||
"click_patterns": {
|
||||
"double_click_speed": random.uniform(200, 500), # ms
|
||||
"click_precision": random.uniform(0.8, 1.0),
|
||||
"accidental_clicks": random.uniform(0.01, 0.05)
|
||||
},
|
||||
"keyboard_behavior": {
|
||||
"typing_speed": random.uniform(30, 80), # WPM
|
||||
"error_rate": random.uniform(0.02, 0.08),
|
||||
"pause_between_words": random.uniform(0.1, 0.5)
|
||||
}
|
||||
}
|
||||
return patterns
|
||||
|
||||
def get_attention_patterns(self):
|
||||
"""获取注意力模式"""
|
||||
return {
|
||||
"attention_span": random.uniform(30, 300), # 注意力持续时间(秒)
|
||||
"distraction_probability": random.uniform(0.1, 0.3), # 分心概率
|
||||
"focus_recovery_time": random.uniform(2, 10), # 重新聚焦时间
|
||||
"multitasking_tendency": random.uniform(0.0, 0.5), # 多任务倾向
|
||||
"content_engagement": random.uniform(0.3, 0.9) # 内容参与度
|
||||
}
|
||||
|
||||
def simulate_realistic_delays(self):
|
||||
"""模拟真实的延迟"""
|
||||
return {
|
||||
"network_latency": random.uniform(50, 200), # 网络延迟(ms)
|
||||
"processing_delay": random.uniform(100, 500), # 处理延迟(ms)
|
||||
"render_delay": random.uniform(50, 150), # 渲染延迟(ms)
|
||||
"user_reaction_delay": random.uniform(200, 800) # 用户反应延迟(ms)
|
||||
}
|
||||
|
||||
return fingerprint
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
db = RealUserDatabase()
|
||||
|
||||
# 生成用户配置
|
||||
# 生成随机用户配置
|
||||
profile = db.get_random_user_profile()
|
||||
print("用户配置:")
|
||||
print(json.dumps(profile, indent=2, ensure_ascii=False))
|
||||
for key, value in profile.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
# 生成HTTP头部
|
||||
headers = db.get_realistic_headers(profile)
|
||||
print("\nHTTP头部:")
|
||||
for key, value in headers.items():
|
||||
print(f"{key}: {value}")
|
||||
|
||||
# 生成访问行为
|
||||
print("\n访问行为:")
|
||||
behavior = db.get_visit_behavior()
|
||||
print(f"\n访问行为: {behavior}")
|
||||
for key, value in behavior.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
# 生成完整会话数据
|
||||
session = db.generate_session_data()
|
||||
print("\n会话数据:")
|
||||
print(json.dumps(session, indent=2, ensure_ascii=False))
|
||||
print("\n广告交互偏好:")
|
||||
ad_prefs = db.get_ad_interaction_preferences()
|
||||
for key, value in ad_prefs.items():
|
||||
print(f" {key}: {value:.2f}")
|
||||
|
||||
print("\n真实时间模式:")
|
||||
timing = db.get_realistic_timing()
|
||||
for key, value in timing.items():
|
||||
print(f" {key}: {value:.2f}秒")
|
||||
Reference in New Issue
Block a user