From 4db06ac5171fc603f226c1509edf148289ed3518 Mon Sep 17 00:00:00 2001 From: huangzhenpc Date: Fri, 18 Jul 2025 11:23:13 +0800 Subject: [PATCH] =?UTF-8?q?=E6=AD=A3=E5=BC=8F22?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- target_site_test.py | 218 +++++++++++++++++++++++++++++++++++++++ user_agent_diagnostic.py | 164 +++++++++++++++++++++++++++++ 2 files changed, 382 insertions(+) create mode 100644 target_site_test.py create mode 100644 user_agent_diagnostic.py diff --git a/target_site_test.py b/target_site_test.py new file mode 100644 index 0000000..a75a1bb --- /dev/null +++ b/target_site_test.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +目标网站测试工具 +专门测试game.586vip.cn的User-Agent处理 +""" + +import requests +import json +import time +import random +from real_user_database import RealUserDatabase + +def test_target_site(): + """测试目标网站的User-Agent处理""" + print("🎯 目标网站测试工具") + print("=" * 50) + + # 加载配置 + try: + with open('config.json', 'r', encoding='utf-8') as f: + config = json.load(f) + except FileNotFoundError: + print("❌ 配置文件config.json不存在") + return + + # 生成真实用户配置 + user_db = RealUserDatabase() + profile = user_db.get_random_user_profile() + real_user_agent = profile["user_agent"] + + print(f"📱 使用的User-Agent:") + print(f" {real_user_agent}") + print() + + # 代理配置 + proxy_config = config.get('proxy') + if proxy_config: + proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}" + proxies = { + 'http': proxy_url, + 'https': proxy_url + } + print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}") + else: + proxies = None + print("🏠 不使用代理") + + print() + + # 测试目标网站 + target_urls = [ + "https://game.586vip.cn/", + "https://game.586vip.cn/games/2048/index.html", + ] + + for url in target_urls: + print(f"🔍 测试目标网站: {url}") + + session = requests.Session() + if proxies: + session.proxies = proxies + + # 设置完整的真实浏览器头部 + headers = { + "User-Agent": real_user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Cache-Control": "max-age=0", + "DNT": "1", + "Pragma": "no-cache", + } + + session.headers.update(headers) + + try: + response = session.get(url, timeout=15, allow_redirects=True) + + print(f"✅ 请求成功 ({response.status_code})") + print(f"📦 响应大小: {len(response.content)} 字节") + print(f"🌍 最终URL: {response.url}") + + # 检查响应头部 + important_headers = ['Server', 'Content-Type', 'Set-Cookie', 'X-Powered-By'] + for header in important_headers: + if header in response.headers: + value = response.headers[header] + if header == 'Set-Cookie': + print(f"🍪 {header}: {value[:100]}...") + else: + print(f"📋 {header}: {value}") + + # 检查是否被重定向 + if response.url != url: + print(f"🔄 发生重定向: {url} -> {response.url}") + + # 检查页面内容 + content = response.text + if len(content) > 0: + print(f"📄 页面内容长度: {len(content)} 字符") + + # 检查是否包含登录相关内容 + login_indicators = ['login', '登录', 'sign in', '验证', 'auth', 'password', '密码'] + login_count = sum(1 for indicator in login_indicators if indicator.lower() in content.lower()) + + if login_count > 3: + print("⚠️ 可能被重定向到登录页面") + print(f" 检测到 {login_count} 个登录相关关键词") + + # 检查是否包含防护相关内容 + protection_indicators = ['cloudflare', 'ddos', 'protection', 'checking', 'verify', 'bot', 'captcha'] + protection_count = sum(1 for indicator in protection_indicators if indicator.lower() in content.lower()) + + if protection_count > 2: + print("🛡️ 可能遇到防护页面") + print(f" 检测到 {protection_count} 个防护相关关键词") + + # 显示页面开头内容 + print(f"📝 页面开头内容:") + print(f" {content[:300]}...") + + else: + print("⚠️ 页面内容为空") + + except requests.exceptions.RequestException as e: + print(f"❌ 请求失败: {e}") + + print("-" * 70) + time.sleep(3) # 避免频繁请求 + + print("\n🎯 分析结果:") + print("如果宝塔显示'WanScannerBot/1.1',可能的原因:") + print("1. 网站使用了CDN或反向代理,修改了User-Agent") + print("2. 网站的防护系统检测到某些特征,标记为爬虫") + print("3. 宝塔的日志记录有问题") + print("4. 网站服务器端的中间件修改了User-Agent") + print() + print("建议解决方案:") + print("1. 尝试更换不同的User-Agent") + print("2. 增加更多真实浏览器特征") + print("3. 模拟更真实的访问行为") + print("4. 检查网站的防护机制") + +def test_different_user_agents(): + """测试不同的User-Agent对目标网站的影响""" + print("\n🔬 多User-Agent目标网站测试") + print("=" * 50) + + # 加载配置 + try: + with open('config.json', 'r', encoding='utf-8') as f: + config = json.load(f) + except FileNotFoundError: + print("❌ 配置文件config.json不存在") + return + + # 代理配置 + proxy_config = config.get('proxy') + if proxy_config: + proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}" + proxies = { + 'http': proxy_url, + 'https': proxy_url + } + else: + proxies = None + + # 测试不同的User-Agent + user_db = RealUserDatabase() + target_url = "https://game.586vip.cn/" + + for i in range(3): + profile = user_db.get_random_user_profile() + ua = profile["user_agent"] + os_info = profile["operating_system"] + + print(f"🎭 测试User-Agent {i+1} ({os_info}):") + print(f" {ua}") + + session = requests.Session() + if proxies: + session.proxies = proxies + + session.headers.update({ + "User-Agent": ua, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + }) + + try: + response = session.get(target_url, timeout=10) + print(f" ✅ 状态码: {response.status_code}") + print(f" 📦 响应大小: {len(response.content)} 字节") + + # 简单检查是否正常 + if response.status_code == 200 and len(response.content) > 1000: + print(" 👍 访问正常") + else: + print(" ⚠️ 访问可能异常") + + except requests.exceptions.RequestException as e: + print(f" ❌ 请求失败: {e}") + + print() + time.sleep(2) + +if __name__ == "__main__": + test_target_site() + test_different_user_agents() \ No newline at end of file diff --git a/user_agent_diagnostic.py b/user_agent_diagnostic.py new file mode 100644 index 0000000..f3c22e8 --- /dev/null +++ b/user_agent_diagnostic.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +User-Agent诊断工具 +检查代理是否修改了User-Agent头部 +""" + +import requests +import json +import time +import random +from real_user_database import RealUserDatabase + +def test_user_agent_with_proxy(): + """测试代理对User-Agent的影响""" + print("🔍 User-Agent诊断工具") + print("=" * 50) + + # 加载配置 + try: + with open('config.json', 'r', encoding='utf-8') as f: + config = json.load(f) + except FileNotFoundError: + print("❌ 配置文件config.json不存在") + return + + # 生成真实用户配置 + user_db = RealUserDatabase() + profile = user_db.get_random_user_profile() + real_user_agent = profile["user_agent"] + + print(f"📱 生成的真实User-Agent:") + print(f" {real_user_agent}") + print() + + # 测试服务列表 + test_services = [ + "https://httpbin.org/headers", + "https://api.ipify.org?format=json", + "https://ipinfo.io/json", + "https://ifconfig.me/all.json", + ] + + # 代理配置 + proxy_config = config.get('proxy') + if proxy_config: + proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}" + proxies = { + 'http': proxy_url, + 'https': proxy_url + } + print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}") + else: + proxies = None + print("🏠 不使用代理") + + print() + + # 测试每个服务 + for i, service in enumerate(test_services): + print(f"🔍 测试服务 {i+1}: {service}") + + session = requests.Session() + if proxies: + session.proxies = proxies + + # 设置完整的真实浏览器头部 + headers = { + "User-Agent": real_user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Cache-Control": "max-age=0", + "DNT": "1", + } + + session.headers.update(headers) + + try: + response = session.get(service, timeout=15) + + if response.status_code == 200: + print(f"✅ 请求成功 ({response.status_code})") + + # 解析响应 + try: + data = response.json() + + # 检查User-Agent + if 'headers' in data and 'User-Agent' in data['headers']: + received_ua = data['headers']['User-Agent'] + print(f"📱 服务器收到的User-Agent:") + print(f" {received_ua}") + + if received_ua == real_user_agent: + print("✅ User-Agent正确传递") + else: + print("❌ User-Agent被修改了!") + print(f" 期望: {real_user_agent}") + print(f" 实际: {received_ua}") + + # 显示其他信息 + if 'ip' in data: + print(f"🌍 IP地址: {data['ip']}") + elif 'origin' in data: + print(f"🌍 IP地址: {data['origin']}") + + if 'headers' in data: + important_headers = ['Accept', 'Accept-Language', 'Accept-Encoding'] + for header in important_headers: + if header in data['headers']: + print(f"📋 {header}: {data['headers'][header]}") + + except json.JSONDecodeError: + print("⚠️ 响应不是JSON格式") + print(f"响应内容: {response.text[:200]}...") + + else: + print(f"❌ 请求失败 ({response.status_code})") + + except requests.exceptions.RequestException as e: + print(f"❌ 请求异常: {e}") + + print("-" * 50) + time.sleep(2) # 避免频繁请求 + + print("\n🎯 诊断建议:") + print("如果User-Agent被修改,可能的原因和解决方案:") + print("1. 代理服务器修改了User-Agent") + print(" - 联系代理服务商") + print(" - 尝试更换代理服务器") + print("2. 代理服务商的反检测策略") + print(" - 使用更高级的代理服务") + print(" - 考虑使用住宅代理") + print("3. 请求被中间件拦截") + print(" - 检查网络环境") + print(" - 尝试直连测试") + +def test_multiple_user_agents(): + """测试多个不同的User-Agent""" + print("\n🔬 多User-Agent测试") + print("=" * 50) + + user_db = RealUserDatabase() + + # 生成5个不同的User-Agent + for i in range(5): + profile = user_db.get_random_user_profile() + ua = profile["user_agent"] + os_info = profile["operating_system"] + + print(f"🎭 User-Agent {i+1} ({os_info}):") + print(f" {ua}") + print() + +if __name__ == "__main__": + test_user_agent_with_proxy() + test_multiple_user_agents() \ No newline at end of file