This commit is contained in:
huangzhenpc
2025-07-18 11:23:13 +08:00
parent 2246295730
commit 4db06ac517
2 changed files with 382 additions and 0 deletions

218
target_site_test.py Normal file
View File

@@ -0,0 +1,218 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
目标网站测试工具
专门测试game.586vip.cn的User-Agent处理
"""
import requests
import json
import time
import random
from real_user_database import RealUserDatabase
def test_target_site():
"""测试目标网站的User-Agent处理"""
print("🎯 目标网站测试工具")
print("=" * 50)
# 加载配置
try:
with open('config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
except FileNotFoundError:
print("❌ 配置文件config.json不存在")
return
# 生成真实用户配置
user_db = RealUserDatabase()
profile = user_db.get_random_user_profile()
real_user_agent = profile["user_agent"]
print(f"📱 使用的User-Agent:")
print(f" {real_user_agent}")
print()
# 代理配置
proxy_config = config.get('proxy')
if proxy_config:
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
proxies = {
'http': proxy_url,
'https': proxy_url
}
print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}")
else:
proxies = None
print("🏠 不使用代理")
print()
# 测试目标网站
target_urls = [
"https://game.586vip.cn/",
"https://game.586vip.cn/games/2048/index.html",
]
for url in target_urls:
print(f"🔍 测试目标网站: {url}")
session = requests.Session()
if proxies:
session.proxies = proxies
# 设置完整的真实浏览器头部
headers = {
"User-Agent": real_user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
"DNT": "1",
"Pragma": "no-cache",
}
session.headers.update(headers)
try:
response = session.get(url, timeout=15, allow_redirects=True)
print(f"✅ 请求成功 ({response.status_code})")
print(f"📦 响应大小: {len(response.content)} 字节")
print(f"🌍 最终URL: {response.url}")
# 检查响应头部
important_headers = ['Server', 'Content-Type', 'Set-Cookie', 'X-Powered-By']
for header in important_headers:
if header in response.headers:
value = response.headers[header]
if header == 'Set-Cookie':
print(f"🍪 {header}: {value[:100]}...")
else:
print(f"📋 {header}: {value}")
# 检查是否被重定向
if response.url != url:
print(f"🔄 发生重定向: {url} -> {response.url}")
# 检查页面内容
content = response.text
if len(content) > 0:
print(f"📄 页面内容长度: {len(content)} 字符")
# 检查是否包含登录相关内容
login_indicators = ['login', '登录', 'sign in', '验证', 'auth', 'password', '密码']
login_count = sum(1 for indicator in login_indicators if indicator.lower() in content.lower())
if login_count > 3:
print("⚠️ 可能被重定向到登录页面")
print(f" 检测到 {login_count} 个登录相关关键词")
# 检查是否包含防护相关内容
protection_indicators = ['cloudflare', 'ddos', 'protection', 'checking', 'verify', 'bot', 'captcha']
protection_count = sum(1 for indicator in protection_indicators if indicator.lower() in content.lower())
if protection_count > 2:
print("🛡️ 可能遇到防护页面")
print(f" 检测到 {protection_count} 个防护相关关键词")
# 显示页面开头内容
print(f"📝 页面开头内容:")
print(f" {content[:300]}...")
else:
print("⚠️ 页面内容为空")
except requests.exceptions.RequestException as e:
print(f"❌ 请求失败: {e}")
print("-" * 70)
time.sleep(3) # 避免频繁请求
print("\n🎯 分析结果:")
print("如果宝塔显示'WanScannerBot/1.1',可能的原因:")
print("1. 网站使用了CDN或反向代理修改了User-Agent")
print("2. 网站的防护系统检测到某些特征,标记为爬虫")
print("3. 宝塔的日志记录有问题")
print("4. 网站服务器端的中间件修改了User-Agent")
print()
print("建议解决方案:")
print("1. 尝试更换不同的User-Agent")
print("2. 增加更多真实浏览器特征")
print("3. 模拟更真实的访问行为")
print("4. 检查网站的防护机制")
def test_different_user_agents():
"""测试不同的User-Agent对目标网站的影响"""
print("\n🔬 多User-Agent目标网站测试")
print("=" * 50)
# 加载配置
try:
with open('config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
except FileNotFoundError:
print("❌ 配置文件config.json不存在")
return
# 代理配置
proxy_config = config.get('proxy')
if proxy_config:
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
proxies = {
'http': proxy_url,
'https': proxy_url
}
else:
proxies = None
# 测试不同的User-Agent
user_db = RealUserDatabase()
target_url = "https://game.586vip.cn/"
for i in range(3):
profile = user_db.get_random_user_profile()
ua = profile["user_agent"]
os_info = profile["operating_system"]
print(f"🎭 测试User-Agent {i+1} ({os_info}):")
print(f" {ua}")
session = requests.Session()
if proxies:
session.proxies = proxies
session.headers.update({
"User-Agent": ua,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
})
try:
response = session.get(target_url, timeout=10)
print(f" ✅ 状态码: {response.status_code}")
print(f" 📦 响应大小: {len(response.content)} 字节")
# 简单检查是否正常
if response.status_code == 200 and len(response.content) > 1000:
print(" 👍 访问正常")
else:
print(" ⚠️ 访问可能异常")
except requests.exceptions.RequestException as e:
print(f" ❌ 请求失败: {e}")
print()
time.sleep(2)
if __name__ == "__main__":
test_target_site()
test_different_user_agents()

164
user_agent_diagnostic.py Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
User-Agent诊断工具
检查代理是否修改了User-Agent头部
"""
import requests
import json
import time
import random
from real_user_database import RealUserDatabase
def test_user_agent_with_proxy():
"""测试代理对User-Agent的影响"""
print("🔍 User-Agent诊断工具")
print("=" * 50)
# 加载配置
try:
with open('config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
except FileNotFoundError:
print("❌ 配置文件config.json不存在")
return
# 生成真实用户配置
user_db = RealUserDatabase()
profile = user_db.get_random_user_profile()
real_user_agent = profile["user_agent"]
print(f"📱 生成的真实User-Agent:")
print(f" {real_user_agent}")
print()
# 测试服务列表
test_services = [
"https://httpbin.org/headers",
"https://api.ipify.org?format=json",
"https://ipinfo.io/json",
"https://ifconfig.me/all.json",
]
# 代理配置
proxy_config = config.get('proxy')
if proxy_config:
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
proxies = {
'http': proxy_url,
'https': proxy_url
}
print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}")
else:
proxies = None
print("🏠 不使用代理")
print()
# 测试每个服务
for i, service in enumerate(test_services):
print(f"🔍 测试服务 {i+1}: {service}")
session = requests.Session()
if proxies:
session.proxies = proxies
# 设置完整的真实浏览器头部
headers = {
"User-Agent": real_user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
"DNT": "1",
}
session.headers.update(headers)
try:
response = session.get(service, timeout=15)
if response.status_code == 200:
print(f"✅ 请求成功 ({response.status_code})")
# 解析响应
try:
data = response.json()
# 检查User-Agent
if 'headers' in data and 'User-Agent' in data['headers']:
received_ua = data['headers']['User-Agent']
print(f"📱 服务器收到的User-Agent:")
print(f" {received_ua}")
if received_ua == real_user_agent:
print("✅ User-Agent正确传递")
else:
print("❌ User-Agent被修改了")
print(f" 期望: {real_user_agent}")
print(f" 实际: {received_ua}")
# 显示其他信息
if 'ip' in data:
print(f"🌍 IP地址: {data['ip']}")
elif 'origin' in data:
print(f"🌍 IP地址: {data['origin']}")
if 'headers' in data:
important_headers = ['Accept', 'Accept-Language', 'Accept-Encoding']
for header in important_headers:
if header in data['headers']:
print(f"📋 {header}: {data['headers'][header]}")
except json.JSONDecodeError:
print("⚠️ 响应不是JSON格式")
print(f"响应内容: {response.text[:200]}...")
else:
print(f"❌ 请求失败 ({response.status_code})")
except requests.exceptions.RequestException as e:
print(f"❌ 请求异常: {e}")
print("-" * 50)
time.sleep(2) # 避免频繁请求
print("\n🎯 诊断建议:")
print("如果User-Agent被修改可能的原因和解决方案")
print("1. 代理服务器修改了User-Agent")
print(" - 联系代理服务商")
print(" - 尝试更换代理服务器")
print("2. 代理服务商的反检测策略")
print(" - 使用更高级的代理服务")
print(" - 考虑使用住宅代理")
print("3. 请求被中间件拦截")
print(" - 检查网络环境")
print(" - 尝试直连测试")
def test_multiple_user_agents():
"""测试多个不同的User-Agent"""
print("\n🔬 多User-Agent测试")
print("=" * 50)
user_db = RealUserDatabase()
# 生成5个不同的User-Agent
for i in range(5):
profile = user_db.get_random_user_profile()
ua = profile["user_agent"]
os_info = profile["operating_system"]
print(f"🎭 User-Agent {i+1} ({os_info}):")
print(f" {ua}")
print()
if __name__ == "__main__":
test_user_agent_with_proxy()
test_multiple_user_agents()