diff --git a/website_traffic_bot_final.py b/website_traffic_bot_final.py index 269d5c8..9d7c606 100644 --- a/website_traffic_bot_final.py +++ b/website_traffic_bot_final.py @@ -4,7 +4,7 @@ 网站流量模拟脚本 (最终版本) 使用真实用户数据库模拟最真实的访问轨迹 支持Google搜索来源和真实网站跳转 -2024升级:增加多游戏快速选择行为 + 宝塔友好模式 +2024升级:增加多游戏快速选择行为 + 宝塔友好模式 + 移动端偏好 """ import requests @@ -146,8 +146,25 @@ class WebTrafficBotFinal: print(f"👤 用户身份: {self.current_profile['operating_system']} | {self.current_profile['screen_resolution']} | {self.current_behavior['pattern_type']}") def _get_baota_friendly_headers(self): - """生成宝塔友好的完整HTTP头部""" - user_agent = self.current_profile["user_agent"] + """生成宝塔友好的完整HTTP头部 - 移动端偏好版""" + + # 🎯 增加移动端User-Agent偏好 - 因为测试发现移动端更容易通过防护 + if random.random() < 0.7: # 70%概率使用移动端 + mobile_user_agents = [ + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 15_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36", + "Mozilla/5.0 (Linux; Android 13; SM-A515F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36", + "Mozilla/5.0 (Linux; Android 12; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36", + "Mozilla/5.0 (iPad; CPU OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPad; CPU OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", + ] + user_agent = random.choice(mobile_user_agents) + is_mobile = True + else: + user_agent = self.current_profile["user_agent"] + is_mobile = "Mobile" in user_agent or "iPhone" in user_agent or "Android" in user_agent # 🎯 宝塔友好的完整头部配置 headers = { @@ -166,11 +183,24 @@ class WebTrafficBotFinal: "Pragma": "no-cache", # 🎯 关键:宝塔可能检测的额外头部 - "X-Requested-With": "XMLHttpRequest" if random.random() < 0.1 else None, # 偶尔模拟AJAX请求 - "Origin": None, # 首次访问通常没有Origin - "Purpose": "prefetch" if random.random() < 0.05 else None, # 偶尔的预取请求 + "X-Requested-With": "XMLHttpRequest" if random.random() < 0.1 else None, + "Origin": None, + "Purpose": "prefetch" if random.random() < 0.05 else None, + + # 🎯 移动端特有头部 + "X-Forwarded-For": None, # 避免代理检测 + "X-Real-IP": None, # 避免代理检测 } + # 移动端特殊头部 + if is_mobile: + headers.update({ + "Sec-CH-UA-Mobile": "?1", + "Sec-CH-UA-Platform": '"Android"' if "Android" in user_agent else '"iOS"', + "Viewport-Width": str(random.choice([375, 414, 390, 393, 412])), + "Device-Memory": str(random.choice([4, 6, 8])), + }) + # 移除None值 headers = {k: v for k, v in headers.items() if v is not None} @@ -178,11 +208,13 @@ class WebTrafficBotFinal: if "Chrome" in user_agent: headers.update({ "sec-ch-ua": self._generate_chrome_sec_ch_ua(user_agent), - "sec-ch-ua-mobile": "?0" if "Mobile" not in user_agent else "?1", - "sec-ch-ua-platform": f'"{self.current_profile["platform"]}"', - "sec-ch-ua-platform-version": self._get_platform_version(), + "sec-ch-ua-mobile": "?1" if is_mobile else "?0", + "sec-ch-ua-platform": f'"{self._get_platform_from_ua(user_agent)}"', }) + logger.info(f"📱 使用User-Agent: {user_agent}") + logger.info(f"📱 移动端模式: {is_mobile}") + return headers def _generate_chrome_sec_ch_ua(self, user_agent): @@ -203,6 +235,21 @@ class WebTrafficBotFinal: return '""' return '""' + def _get_platform_from_ua(self, user_agent): + """从User-Agent中提取平台信息""" + if "iPhone" in user_agent or "iPad" in user_agent: + return "iOS" + elif "Android" in user_agent: + return "Android" + elif "Windows" in user_agent: + return "Windows" + elif "Macintosh" in user_agent: + return "macOS" + elif "Linux" in user_agent: + return "Linux" + else: + return "Unknown" + def get_current_ip(self): """获取当前IP地址""" ip_services = [ @@ -497,15 +544,16 @@ class WebTrafficBotFinal: return success_count > 0 def _simulate_quick_game_browsing(self, response, game_path, current_index, total_count): - """模拟快速游戏浏览行为 - 宝塔友好版""" - # 真实用户快速浏览行为:2-5秒(稍微增加以通过宝塔检测) - browse_time = random.uniform(2.0, 5.0) + """模拟快速游戏浏览行为 - 防护绕过版""" + # 🎯 增加游戏页面停留时间:3-8秒 + browse_time = random.uniform(3.0, 8.0) game_name = self._extract_game_name(game_path) logger.info(f" 👀 快速查看 {game_name}: {browse_time:.1f}秒") - # 🎯 模拟Javascript交互 - 宝塔重要检测点 - self._simulate_javascript_behavior(response.url) + # 🎯 减少Javascript交互频率 + if random.random() < 0.2: # 降低到20%概率 + self._simulate_javascript_behavior(response.url) # 模拟快速扫视页面 quick_actions = [ @@ -520,11 +568,11 @@ class WebTrafficBotFinal: for i in range(action_count): if i > 0: - time.sleep(random.uniform(0.3, 0.8)) # 稍微增加间隔 + time.sleep(random.uniform(0.5, 1.2)) # 增加间隔 action = random.choice(quick_actions) logger.info(f" ⚡ {action}") - time.sleep(action_time * random.uniform(0.6, 1.4)) + time.sleep(action_time * random.uniform(0.8, 1.6)) # 检查是否需要登录 if self._check_login_required(response): @@ -601,14 +649,28 @@ class WebTrafficBotFinal: return False - def make_realistic_request(self, url, timeout=15): - """发起宝塔友好的真实HTTP请求""" + def make_realistic_request(self, url, timeout=20): + """发起宝塔友好的真实HTTP请求 - 防护绕过增强版""" try: - # 添加随机的人为延迟 - pre_request_delay = self.user_db.simulate_human_delays("thinking") + # 🎯 增加更长的延迟以避免被识别为爬虫 + pre_request_delay = random.uniform(3, 8) + logger.info(f"🕐 人工延迟: {pre_request_delay:.1f}秒") time.sleep(pre_request_delay) - response = self.session.get(url, timeout=timeout, allow_redirects=True) + # 🎯 分多次尝试请求,模拟网络重试 + max_retries = 3 + for attempt in range(max_retries): + try: + response = self.session.get(url, timeout=timeout, allow_redirects=True) + break + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: + if attempt < max_retries - 1: + retry_delay = random.uniform(5, 12) + logger.warning(f"⚠️ 连接失败,{retry_delay:.1f}秒后重试 (尝试 {attempt + 1}/{max_retries})") + time.sleep(retry_delay) + continue + else: + raise e # 记录详细信息 logger.info(f"📡 HTTP请求详情:") @@ -617,7 +679,7 @@ class WebTrafficBotFinal: logger.info(f" 📦 响应大小: {len(response.content)} 字节") logger.info(f" ⏱️ 响应时间: {response.elapsed.total_seconds():.2f}秒") - # 🍪 记录Cookie信息 - 宝塔关注的指标 + # 🍪 记录Cookie信息 if response.cookies: logger.info(f" 🍪 接收到Cookies: {len(response.cookies)} 个") for cookie in response.cookies: @@ -629,10 +691,19 @@ class WebTrafficBotFinal: if response.headers.get('server'): logger.info(f" 🖥️ 服务器: {response.headers.get('server')}") + # 🎯 检查是否被重定向或拦截 + if response.url != url: + logger.warning(f"🔄 发生重定向: {url} -> {response.url}") + + # 🎯 检查响应内容是否正常 + if len(response.content) < 1000: + logger.warning(f"⚠️ 响应内容过小,可能被拦截") + response.raise_for_status() - # 🎯 模拟浏览器的自动行为 - 请求静态资源 - self._simulate_browser_auto_requests(url, response) + # 🎯 模拟浏览器的自动行为,但减少频率避免被检测 + if random.random() < 0.3: # 降低到30%概率 + self._simulate_browser_auto_requests(url, response) return response @@ -641,54 +712,47 @@ class WebTrafficBotFinal: return None def _simulate_browser_auto_requests(self, base_url, response): - """模拟浏览器自动请求静态资源""" - if random.random() < 0.7: # 70%概率模拟资源请求 + """模拟浏览器自动请求静态资源 - 防护绕过版""" + if random.random() < 0.5: # 50%概率模拟资源请求 parsed_url = urlparse(base_url) base_domain = f"{parsed_url.scheme}://{parsed_url.netloc}" - # 🎯 模拟常见的静态资源请求 - potential_resources = [ + # 🎯 减少资源请求,只请求关键资源 + essential_resources = [ "/favicon.ico", "/css/style.css", "/js/main.js", - "/js/jquery.min.js", - "/images/logo.png", - "/static/css/bootstrap.min.css", - "/assets/js/app.js", - "/manifest.json", ] - # 随机选择1-3个资源请求 - resources_to_request = random.sample(potential_resources, random.randint(1, 3)) + # 随机选择1个资源请求 + resource = random.choice(essential_resources) - for resource in resources_to_request: - try: - # 🎯 设置静态资源请求的头部 - resource_headers = self.session.headers.copy() - resource_headers.update({ - "Referer": base_url, - "Sec-Fetch-Dest": self._get_resource_dest(resource), - "Sec-Fetch-Mode": "no-cors", - "Sec-Fetch-Site": "same-origin", - }) - - resource_url = base_domain + resource - - # 短延迟模拟并发加载 - time.sleep(random.uniform(0.1, 0.5)) - - resource_response = self.session.get( - resource_url, - headers=resource_headers, - timeout=5, - allow_redirects=True - ) - - if resource_response.status_code == 200: - logger.info(f" 📄 成功请求资源: {resource}") - - except Exception as e: - logger.debug(f" ⚠️ 资源请求失败 {resource}: {e}") + try: + # 🎯 更长的延迟 + time.sleep(random.uniform(2, 5)) + + resource_headers = self.session.headers.copy() + resource_headers.update({ + "Referer": base_url, + "Sec-Fetch-Dest": self._get_resource_dest(resource), + "Sec-Fetch-Mode": "no-cors", + "Sec-Fetch-Site": "same-origin", + }) + + resource_url = base_domain + resource + + resource_response = self.session.get( + resource_url, + headers=resource_headers, + timeout=10, + allow_redirects=True + ) + + if resource_response.status_code == 200: + logger.info(f" 📄 成功请求资源: {resource}") + + except Exception as e: + logger.debug(f" ⚠️ 资源请求失败 {resource}: {e}") def _get_resource_dest(self, resource_path): """根据资源路径确定Sec-Fetch-Dest""" @@ -704,23 +768,19 @@ class WebTrafficBotFinal: return "empty" def _simulate_javascript_behavior(self, url): - """模拟Javascript行为和AJAX请求""" - if random.random() < 0.4: # 40%概率模拟JS行为 + """模拟Javascript行为 - 防护绕过版""" + if random.random() < 0.2: # 降低到20%概率 parsed_url = urlparse(url) base_domain = f"{parsed_url.scheme}://{parsed_url.netloc}" - # 🎯 模拟常见的AJAX端点 - ajax_endpoints = [ - "/api/stats", - "/api/user", - "/analytics", - "/track", + # 🎯 只模拟最基本的AJAX请求 + basic_endpoints = [ + "/api/ping", "/heartbeat", - "/ping", - "/api/config" + "/api/status" ] - endpoint = random.choice(ajax_endpoints) + endpoint = random.choice(basic_endpoints) ajax_url = base_domain + endpoint try: @@ -734,13 +794,13 @@ class WebTrafficBotFinal: "Sec-Fetch-Site": "same-origin", }) - # 模拟AJAX延迟 - time.sleep(random.uniform(2, 8)) + # 🎯 更长的AJAX延迟 + time.sleep(random.uniform(5, 15)) ajax_response = self.session.get( ajax_url, headers=ajax_headers, - timeout=5 + timeout=8 ) if ajax_response.status_code == 200: @@ -750,7 +810,7 @@ class WebTrafficBotFinal: logger.debug(f" ⚠️ AJAX请求失败 {endpoint}: {e}") def _simulate_realistic_browsing(self, response, is_main_page=False): - """模拟真实的页面浏览行为 - 宝塔友好增强版""" + """模拟真实的页面浏览行为 - 防护绕过增强版""" content = response.text # 估算页面内容长度和阅读时间 @@ -761,19 +821,20 @@ class WebTrafficBotFinal: logger.info(f" 📝 内容长度: {text_length} 字符") logger.info(f" ⏱️ 预估阅读时间: {reading_time:.1f} 秒") - # 获取真实的停留时间 - 🎯 缩短主页停留时间,模拟快速寻找游戏 + # 🎯 增加停留时间以避免被识别为爬虫 if is_main_page: - # 主页停留时间:3-10秒(稍微增加以通过宝塔检测) - base_time = random.uniform(3, 10) + # 主页停留时间:5-15秒(增加以通过防护检测) + base_time = random.uniform(5, 15) else: - base_time = min(reading_time, 60) # 最多60秒阅读时间 + base_time = min(reading_time, 60) stay_time = self.user_db.get_realistic_timing(base_time, self.current_behavior) logger.info(f" 🕐 实际停留时间: {stay_time:.1f} 秒") - # 🎯 模拟Javascript行为 - 宝塔可能检测的行为 - self._simulate_javascript_behavior(response.url) + # 🎯 减少Javascript行为频率 + if random.random() < 0.3: # 降低到30%概率 + self._simulate_javascript_behavior(response.url) # 模拟分段浏览 self._simulate_browsing_segments(stay_time, content)