This commit is contained in:
huangzhenpc
2025-07-18 11:24:38 +08:00
parent 4db06ac517
commit fdac72a040

View File

@@ -4,7 +4,7 @@
网站流量模拟脚本 (最终版本)
使用真实用户数据库模拟最真实的访问轨迹
支持Google搜索来源和真实网站跳转
2024升级增加多游戏快速选择行为 + 宝塔友好模式
2024升级增加多游戏快速选择行为 + 宝塔友好模式 + 移动端偏好
"""
import requests
@@ -146,8 +146,25 @@ class WebTrafficBotFinal:
print(f"👤 用户身份: {self.current_profile['operating_system']} | {self.current_profile['screen_resolution']} | {self.current_behavior['pattern_type']}")
def _get_baota_friendly_headers(self):
"""生成宝塔友好的完整HTTP头部"""
user_agent = self.current_profile["user_agent"]
"""生成宝塔友好的完整HTTP头部 - 移动端偏好版"""
# 🎯 增加移动端User-Agent偏好 - 因为测试发现移动端更容易通过防护
if random.random() < 0.7: # 70%概率使用移动端
mobile_user_agents = [
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 13; SM-A515F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 12; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36",
"Mozilla/5.0 (iPad; CPU OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPad; CPU OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
]
user_agent = random.choice(mobile_user_agents)
is_mobile = True
else:
user_agent = self.current_profile["user_agent"]
is_mobile = "Mobile" in user_agent or "iPhone" in user_agent or "Android" in user_agent
# 🎯 宝塔友好的完整头部配置
headers = {
@@ -166,11 +183,24 @@ class WebTrafficBotFinal:
"Pragma": "no-cache",
# 🎯 关键:宝塔可能检测的额外头部
"X-Requested-With": "XMLHttpRequest" if random.random() < 0.1 else None, # 偶尔模拟AJAX请求
"Origin": None, # 首次访问通常没有Origin
"Purpose": "prefetch" if random.random() < 0.05 else None, # 偶尔的预取请求
"X-Requested-With": "XMLHttpRequest" if random.random() < 0.1 else None,
"Origin": None,
"Purpose": "prefetch" if random.random() < 0.05 else None,
# 🎯 移动端特有头部
"X-Forwarded-For": None, # 避免代理检测
"X-Real-IP": None, # 避免代理检测
}
# 移动端特殊头部
if is_mobile:
headers.update({
"Sec-CH-UA-Mobile": "?1",
"Sec-CH-UA-Platform": '"Android"' if "Android" in user_agent else '"iOS"',
"Viewport-Width": str(random.choice([375, 414, 390, 393, 412])),
"Device-Memory": str(random.choice([4, 6, 8])),
})
# 移除None值
headers = {k: v for k, v in headers.items() if v is not None}
@@ -178,11 +208,13 @@ class WebTrafficBotFinal:
if "Chrome" in user_agent:
headers.update({
"sec-ch-ua": self._generate_chrome_sec_ch_ua(user_agent),
"sec-ch-ua-mobile": "?0" if "Mobile" not in user_agent else "?1",
"sec-ch-ua-platform": f'"{self.current_profile["platform"]}"',
"sec-ch-ua-platform-version": self._get_platform_version(),
"sec-ch-ua-mobile": "?1" if is_mobile else "?0",
"sec-ch-ua-platform": f'"{self._get_platform_from_ua(user_agent)}"',
})
logger.info(f"📱 使用User-Agent: {user_agent}")
logger.info(f"📱 移动端模式: {is_mobile}")
return headers
def _generate_chrome_sec_ch_ua(self, user_agent):
@@ -203,6 +235,21 @@ class WebTrafficBotFinal:
return '""'
return '""'
def _get_platform_from_ua(self, user_agent):
"""从User-Agent中提取平台信息"""
if "iPhone" in user_agent or "iPad" in user_agent:
return "iOS"
elif "Android" in user_agent:
return "Android"
elif "Windows" in user_agent:
return "Windows"
elif "Macintosh" in user_agent:
return "macOS"
elif "Linux" in user_agent:
return "Linux"
else:
return "Unknown"
def get_current_ip(self):
"""获取当前IP地址"""
ip_services = [
@@ -497,15 +544,16 @@ class WebTrafficBotFinal:
return success_count > 0
def _simulate_quick_game_browsing(self, response, game_path, current_index, total_count):
"""模拟快速游戏浏览行为 - 宝塔友好"""
# 真实用户快速浏览行为2-5秒稍微增加以通过宝塔检测
browse_time = random.uniform(2.0, 5.0)
"""模拟快速游戏浏览行为 - 防护绕过"""
# 🎯 增加游戏页面停留时间3-8秒
browse_time = random.uniform(3.0, 8.0)
game_name = self._extract_game_name(game_path)
logger.info(f" 👀 快速查看 {game_name}: {browse_time:.1f}")
# 🎯 模拟Javascript交互 - 宝塔重要检测点
self._simulate_javascript_behavior(response.url)
# 🎯 减少Javascript交互频率
if random.random() < 0.2: # 降低到20%概率
self._simulate_javascript_behavior(response.url)
# 模拟快速扫视页面
quick_actions = [
@@ -520,11 +568,11 @@ class WebTrafficBotFinal:
for i in range(action_count):
if i > 0:
time.sleep(random.uniform(0.3, 0.8)) # 稍微增加间隔
time.sleep(random.uniform(0.5, 1.2)) # 增加间隔
action = random.choice(quick_actions)
logger.info(f"{action}")
time.sleep(action_time * random.uniform(0.6, 1.4))
time.sleep(action_time * random.uniform(0.8, 1.6))
# 检查是否需要登录
if self._check_login_required(response):
@@ -601,14 +649,28 @@ class WebTrafficBotFinal:
return False
def make_realistic_request(self, url, timeout=15):
"""发起宝塔友好的真实HTTP请求"""
def make_realistic_request(self, url, timeout=20):
"""发起宝塔友好的真实HTTP请求 - 防护绕过增强版"""
try:
# 添加随机的人为延迟
pre_request_delay = self.user_db.simulate_human_delays("thinking")
# 🎯 增加更长的延迟以避免被识别为爬虫
pre_request_delay = random.uniform(3, 8)
logger.info(f"🕐 人工延迟: {pre_request_delay:.1f}")
time.sleep(pre_request_delay)
response = self.session.get(url, timeout=timeout, allow_redirects=True)
# 🎯 分多次尝试请求,模拟网络重试
max_retries = 3
for attempt in range(max_retries):
try:
response = self.session.get(url, timeout=timeout, allow_redirects=True)
break
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
if attempt < max_retries - 1:
retry_delay = random.uniform(5, 12)
logger.warning(f"⚠️ 连接失败,{retry_delay:.1f}秒后重试 (尝试 {attempt + 1}/{max_retries})")
time.sleep(retry_delay)
continue
else:
raise e
# 记录详细信息
logger.info(f"📡 HTTP请求详情:")
@@ -617,7 +679,7 @@ class WebTrafficBotFinal:
logger.info(f" 📦 响应大小: {len(response.content)} 字节")
logger.info(f" ⏱️ 响应时间: {response.elapsed.total_seconds():.2f}")
# 🍪 记录Cookie信息 - 宝塔关注的指标
# 🍪 记录Cookie信息
if response.cookies:
logger.info(f" 🍪 接收到Cookies: {len(response.cookies)}")
for cookie in response.cookies:
@@ -629,10 +691,19 @@ class WebTrafficBotFinal:
if response.headers.get('server'):
logger.info(f" 🖥️ 服务器: {response.headers.get('server')}")
# 🎯 检查是否被重定向或拦截
if response.url != url:
logger.warning(f"🔄 发生重定向: {url} -> {response.url}")
# 🎯 检查响应内容是否正常
if len(response.content) < 1000:
logger.warning(f"⚠️ 响应内容过小,可能被拦截")
response.raise_for_status()
# 🎯 模拟浏览器的自动行为 - 请求静态资源
self._simulate_browser_auto_requests(url, response)
# 🎯 模拟浏览器的自动行为,但减少频率避免被检测
if random.random() < 0.3: # 降低到30%概率
self._simulate_browser_auto_requests(url, response)
return response
@@ -641,54 +712,47 @@ class WebTrafficBotFinal:
return None
def _simulate_browser_auto_requests(self, base_url, response):
"""模拟浏览器自动请求静态资源"""
if random.random() < 0.7: # 70%概率模拟资源请求
"""模拟浏览器自动请求静态资源 - 防护绕过版"""
if random.random() < 0.5: # 50%概率模拟资源请求
parsed_url = urlparse(base_url)
base_domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
# 🎯 模拟常见的静态资源请求
potential_resources = [
# 🎯 减少资源请求,只请求关键资源
essential_resources = [
"/favicon.ico",
"/css/style.css",
"/js/main.js",
"/js/jquery.min.js",
"/images/logo.png",
"/static/css/bootstrap.min.css",
"/assets/js/app.js",
"/manifest.json",
]
# 随机选择1-3个资源请求
resources_to_request = random.sample(potential_resources, random.randint(1, 3))
# 随机选择1个资源请求
resource = random.choice(essential_resources)
for resource in resources_to_request:
try:
# 🎯 设置静态资源请求的头部
resource_headers = self.session.headers.copy()
resource_headers.update({
"Referer": base_url,
"Sec-Fetch-Dest": self._get_resource_dest(resource),
"Sec-Fetch-Mode": "no-cors",
"Sec-Fetch-Site": "same-origin",
})
resource_url = base_domain + resource
# 短延迟模拟并发加载
time.sleep(random.uniform(0.1, 0.5))
resource_response = self.session.get(
resource_url,
headers=resource_headers,
timeout=5,
allow_redirects=True
)
if resource_response.status_code == 200:
logger.info(f" 📄 成功请求资源: {resource}")
except Exception as e:
logger.debug(f" ⚠️ 资源请求失败 {resource}: {e}")
try:
# 🎯 更长的延迟
time.sleep(random.uniform(2, 5))
resource_headers = self.session.headers.copy()
resource_headers.update({
"Referer": base_url,
"Sec-Fetch-Dest": self._get_resource_dest(resource),
"Sec-Fetch-Mode": "no-cors",
"Sec-Fetch-Site": "same-origin",
})
resource_url = base_domain + resource
resource_response = self.session.get(
resource_url,
headers=resource_headers,
timeout=10,
allow_redirects=True
)
if resource_response.status_code == 200:
logger.info(f" 📄 成功请求资源: {resource}")
except Exception as e:
logger.debug(f" ⚠️ 资源请求失败 {resource}: {e}")
def _get_resource_dest(self, resource_path):
"""根据资源路径确定Sec-Fetch-Dest"""
@@ -704,23 +768,19 @@ class WebTrafficBotFinal:
return "empty"
def _simulate_javascript_behavior(self, url):
"""模拟Javascript行为和AJAX请求"""
if random.random() < 0.4: # 40%概率模拟JS行为
"""模拟Javascript行为 - 防护绕过版"""
if random.random() < 0.2: # 降低到20%概率
parsed_url = urlparse(url)
base_domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
# 🎯 模拟常见的AJAX端点
ajax_endpoints = [
"/api/stats",
"/api/user",
"/analytics",
"/track",
# 🎯 模拟最基本的AJAX请求
basic_endpoints = [
"/api/ping",
"/heartbeat",
"/ping",
"/api/config"
"/api/status"
]
endpoint = random.choice(ajax_endpoints)
endpoint = random.choice(basic_endpoints)
ajax_url = base_domain + endpoint
try:
@@ -734,13 +794,13 @@ class WebTrafficBotFinal:
"Sec-Fetch-Site": "same-origin",
})
# 模拟AJAX延迟
time.sleep(random.uniform(2, 8))
# 🎯 更长的AJAX延迟
time.sleep(random.uniform(5, 15))
ajax_response = self.session.get(
ajax_url,
headers=ajax_headers,
timeout=5
timeout=8
)
if ajax_response.status_code == 200:
@@ -750,7 +810,7 @@ class WebTrafficBotFinal:
logger.debug(f" ⚠️ AJAX请求失败 {endpoint}: {e}")
def _simulate_realistic_browsing(self, response, is_main_page=False):
"""模拟真实的页面浏览行为 - 宝塔友好增强版"""
"""模拟真实的页面浏览行为 - 防护绕过增强版"""
content = response.text
# 估算页面内容长度和阅读时间
@@ -761,19 +821,20 @@ class WebTrafficBotFinal:
logger.info(f" 📝 内容长度: {text_length} 字符")
logger.info(f" ⏱️ 预估阅读时间: {reading_time:.1f}")
# 获取真实的停留时间 - 🎯 缩短主页停留时间,模拟快速寻找游戏
# 🎯 增加停留时间以避免被识别为爬虫
if is_main_page:
# 主页停留时间:3-10秒(稍微增加以通过宝塔检测)
base_time = random.uniform(3, 10)
# 主页停留时间:5-15秒(增加以通过防护检测)
base_time = random.uniform(5, 15)
else:
base_time = min(reading_time, 60) # 最多60秒阅读时间
base_time = min(reading_time, 60)
stay_time = self.user_db.get_realistic_timing(base_time, self.current_behavior)
logger.info(f" 🕐 实际停留时间: {stay_time:.1f}")
# 🎯 模拟Javascript行为 - 宝塔可能检测的行为
self._simulate_javascript_behavior(response.url)
# 🎯 减少Javascript行为频率
if random.random() < 0.3: # 降低到30%概率
self._simulate_javascript_behavior(response.url)
# 模拟分段浏览
self._simulate_browsing_segments(stay_time, content)