正式22
This commit is contained in:
218
target_site_test.py
Normal file
218
target_site_test.py
Normal file
@@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
目标网站测试工具
|
||||
专门测试game.586vip.cn的User-Agent处理
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import random
|
||||
from real_user_database import RealUserDatabase
|
||||
|
||||
def test_target_site():
|
||||
"""测试目标网站的User-Agent处理"""
|
||||
print("🎯 目标网站测试工具")
|
||||
print("=" * 50)
|
||||
|
||||
# 加载配置
|
||||
try:
|
||||
with open('config.json', 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
except FileNotFoundError:
|
||||
print("❌ 配置文件config.json不存在")
|
||||
return
|
||||
|
||||
# 生成真实用户配置
|
||||
user_db = RealUserDatabase()
|
||||
profile = user_db.get_random_user_profile()
|
||||
real_user_agent = profile["user_agent"]
|
||||
|
||||
print(f"📱 使用的User-Agent:")
|
||||
print(f" {real_user_agent}")
|
||||
print()
|
||||
|
||||
# 代理配置
|
||||
proxy_config = config.get('proxy')
|
||||
if proxy_config:
|
||||
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
|
||||
proxies = {
|
||||
'http': proxy_url,
|
||||
'https': proxy_url
|
||||
}
|
||||
print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}")
|
||||
else:
|
||||
proxies = None
|
||||
print("🏠 不使用代理")
|
||||
|
||||
print()
|
||||
|
||||
# 测试目标网站
|
||||
target_urls = [
|
||||
"https://game.586vip.cn/",
|
||||
"https://game.586vip.cn/games/2048/index.html",
|
||||
]
|
||||
|
||||
for url in target_urls:
|
||||
print(f"🔍 测试目标网站: {url}")
|
||||
|
||||
session = requests.Session()
|
||||
if proxies:
|
||||
session.proxies = proxies
|
||||
|
||||
# 设置完整的真实浏览器头部
|
||||
headers = {
|
||||
"User-Agent": real_user_agent,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Cache-Control": "max-age=0",
|
||||
"DNT": "1",
|
||||
"Pragma": "no-cache",
|
||||
}
|
||||
|
||||
session.headers.update(headers)
|
||||
|
||||
try:
|
||||
response = session.get(url, timeout=15, allow_redirects=True)
|
||||
|
||||
print(f"✅ 请求成功 ({response.status_code})")
|
||||
print(f"📦 响应大小: {len(response.content)} 字节")
|
||||
print(f"🌍 最终URL: {response.url}")
|
||||
|
||||
# 检查响应头部
|
||||
important_headers = ['Server', 'Content-Type', 'Set-Cookie', 'X-Powered-By']
|
||||
for header in important_headers:
|
||||
if header in response.headers:
|
||||
value = response.headers[header]
|
||||
if header == 'Set-Cookie':
|
||||
print(f"🍪 {header}: {value[:100]}...")
|
||||
else:
|
||||
print(f"📋 {header}: {value}")
|
||||
|
||||
# 检查是否被重定向
|
||||
if response.url != url:
|
||||
print(f"🔄 发生重定向: {url} -> {response.url}")
|
||||
|
||||
# 检查页面内容
|
||||
content = response.text
|
||||
if len(content) > 0:
|
||||
print(f"📄 页面内容长度: {len(content)} 字符")
|
||||
|
||||
# 检查是否包含登录相关内容
|
||||
login_indicators = ['login', '登录', 'sign in', '验证', 'auth', 'password', '密码']
|
||||
login_count = sum(1 for indicator in login_indicators if indicator.lower() in content.lower())
|
||||
|
||||
if login_count > 3:
|
||||
print("⚠️ 可能被重定向到登录页面")
|
||||
print(f" 检测到 {login_count} 个登录相关关键词")
|
||||
|
||||
# 检查是否包含防护相关内容
|
||||
protection_indicators = ['cloudflare', 'ddos', 'protection', 'checking', 'verify', 'bot', 'captcha']
|
||||
protection_count = sum(1 for indicator in protection_indicators if indicator.lower() in content.lower())
|
||||
|
||||
if protection_count > 2:
|
||||
print("🛡️ 可能遇到防护页面")
|
||||
print(f" 检测到 {protection_count} 个防护相关关键词")
|
||||
|
||||
# 显示页面开头内容
|
||||
print(f"📝 页面开头内容:")
|
||||
print(f" {content[:300]}...")
|
||||
|
||||
else:
|
||||
print("⚠️ 页面内容为空")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ 请求失败: {e}")
|
||||
|
||||
print("-" * 70)
|
||||
time.sleep(3) # 避免频繁请求
|
||||
|
||||
print("\n🎯 分析结果:")
|
||||
print("如果宝塔显示'WanScannerBot/1.1',可能的原因:")
|
||||
print("1. 网站使用了CDN或反向代理,修改了User-Agent")
|
||||
print("2. 网站的防护系统检测到某些特征,标记为爬虫")
|
||||
print("3. 宝塔的日志记录有问题")
|
||||
print("4. 网站服务器端的中间件修改了User-Agent")
|
||||
print()
|
||||
print("建议解决方案:")
|
||||
print("1. 尝试更换不同的User-Agent")
|
||||
print("2. 增加更多真实浏览器特征")
|
||||
print("3. 模拟更真实的访问行为")
|
||||
print("4. 检查网站的防护机制")
|
||||
|
||||
def test_different_user_agents():
|
||||
"""测试不同的User-Agent对目标网站的影响"""
|
||||
print("\n🔬 多User-Agent目标网站测试")
|
||||
print("=" * 50)
|
||||
|
||||
# 加载配置
|
||||
try:
|
||||
with open('config.json', 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
except FileNotFoundError:
|
||||
print("❌ 配置文件config.json不存在")
|
||||
return
|
||||
|
||||
# 代理配置
|
||||
proxy_config = config.get('proxy')
|
||||
if proxy_config:
|
||||
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
|
||||
proxies = {
|
||||
'http': proxy_url,
|
||||
'https': proxy_url
|
||||
}
|
||||
else:
|
||||
proxies = None
|
||||
|
||||
# 测试不同的User-Agent
|
||||
user_db = RealUserDatabase()
|
||||
target_url = "https://game.586vip.cn/"
|
||||
|
||||
for i in range(3):
|
||||
profile = user_db.get_random_user_profile()
|
||||
ua = profile["user_agent"]
|
||||
os_info = profile["operating_system"]
|
||||
|
||||
print(f"🎭 测试User-Agent {i+1} ({os_info}):")
|
||||
print(f" {ua}")
|
||||
|
||||
session = requests.Session()
|
||||
if proxies:
|
||||
session.proxies = proxies
|
||||
|
||||
session.headers.update({
|
||||
"User-Agent": ua,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
})
|
||||
|
||||
try:
|
||||
response = session.get(target_url, timeout=10)
|
||||
print(f" ✅ 状态码: {response.status_code}")
|
||||
print(f" 📦 响应大小: {len(response.content)} 字节")
|
||||
|
||||
# 简单检查是否正常
|
||||
if response.status_code == 200 and len(response.content) > 1000:
|
||||
print(" 👍 访问正常")
|
||||
else:
|
||||
print(" ⚠️ 访问可能异常")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f" ❌ 请求失败: {e}")
|
||||
|
||||
print()
|
||||
time.sleep(2)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_target_site()
|
||||
test_different_user_agents()
|
||||
164
user_agent_diagnostic.py
Normal file
164
user_agent_diagnostic.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
User-Agent诊断工具
|
||||
检查代理是否修改了User-Agent头部
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import random
|
||||
from real_user_database import RealUserDatabase
|
||||
|
||||
def test_user_agent_with_proxy():
|
||||
"""测试代理对User-Agent的影响"""
|
||||
print("🔍 User-Agent诊断工具")
|
||||
print("=" * 50)
|
||||
|
||||
# 加载配置
|
||||
try:
|
||||
with open('config.json', 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
except FileNotFoundError:
|
||||
print("❌ 配置文件config.json不存在")
|
||||
return
|
||||
|
||||
# 生成真实用户配置
|
||||
user_db = RealUserDatabase()
|
||||
profile = user_db.get_random_user_profile()
|
||||
real_user_agent = profile["user_agent"]
|
||||
|
||||
print(f"📱 生成的真实User-Agent:")
|
||||
print(f" {real_user_agent}")
|
||||
print()
|
||||
|
||||
# 测试服务列表
|
||||
test_services = [
|
||||
"https://httpbin.org/headers",
|
||||
"https://api.ipify.org?format=json",
|
||||
"https://ipinfo.io/json",
|
||||
"https://ifconfig.me/all.json",
|
||||
]
|
||||
|
||||
# 代理配置
|
||||
proxy_config = config.get('proxy')
|
||||
if proxy_config:
|
||||
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
|
||||
proxies = {
|
||||
'http': proxy_url,
|
||||
'https': proxy_url
|
||||
}
|
||||
print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}")
|
||||
else:
|
||||
proxies = None
|
||||
print("🏠 不使用代理")
|
||||
|
||||
print()
|
||||
|
||||
# 测试每个服务
|
||||
for i, service in enumerate(test_services):
|
||||
print(f"🔍 测试服务 {i+1}: {service}")
|
||||
|
||||
session = requests.Session()
|
||||
if proxies:
|
||||
session.proxies = proxies
|
||||
|
||||
# 设置完整的真实浏览器头部
|
||||
headers = {
|
||||
"User-Agent": real_user_agent,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Cache-Control": "max-age=0",
|
||||
"DNT": "1",
|
||||
}
|
||||
|
||||
session.headers.update(headers)
|
||||
|
||||
try:
|
||||
response = session.get(service, timeout=15)
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f"✅ 请求成功 ({response.status_code})")
|
||||
|
||||
# 解析响应
|
||||
try:
|
||||
data = response.json()
|
||||
|
||||
# 检查User-Agent
|
||||
if 'headers' in data and 'User-Agent' in data['headers']:
|
||||
received_ua = data['headers']['User-Agent']
|
||||
print(f"📱 服务器收到的User-Agent:")
|
||||
print(f" {received_ua}")
|
||||
|
||||
if received_ua == real_user_agent:
|
||||
print("✅ User-Agent正确传递")
|
||||
else:
|
||||
print("❌ User-Agent被修改了!")
|
||||
print(f" 期望: {real_user_agent}")
|
||||
print(f" 实际: {received_ua}")
|
||||
|
||||
# 显示其他信息
|
||||
if 'ip' in data:
|
||||
print(f"🌍 IP地址: {data['ip']}")
|
||||
elif 'origin' in data:
|
||||
print(f"🌍 IP地址: {data['origin']}")
|
||||
|
||||
if 'headers' in data:
|
||||
important_headers = ['Accept', 'Accept-Language', 'Accept-Encoding']
|
||||
for header in important_headers:
|
||||
if header in data['headers']:
|
||||
print(f"📋 {header}: {data['headers'][header]}")
|
||||
|
||||
except json.JSONDecodeError:
|
||||
print("⚠️ 响应不是JSON格式")
|
||||
print(f"响应内容: {response.text[:200]}...")
|
||||
|
||||
else:
|
||||
print(f"❌ 请求失败 ({response.status_code})")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ 请求异常: {e}")
|
||||
|
||||
print("-" * 50)
|
||||
time.sleep(2) # 避免频繁请求
|
||||
|
||||
print("\n🎯 诊断建议:")
|
||||
print("如果User-Agent被修改,可能的原因和解决方案:")
|
||||
print("1. 代理服务器修改了User-Agent")
|
||||
print(" - 联系代理服务商")
|
||||
print(" - 尝试更换代理服务器")
|
||||
print("2. 代理服务商的反检测策略")
|
||||
print(" - 使用更高级的代理服务")
|
||||
print(" - 考虑使用住宅代理")
|
||||
print("3. 请求被中间件拦截")
|
||||
print(" - 检查网络环境")
|
||||
print(" - 尝试直连测试")
|
||||
|
||||
def test_multiple_user_agents():
|
||||
"""测试多个不同的User-Agent"""
|
||||
print("\n🔬 多User-Agent测试")
|
||||
print("=" * 50)
|
||||
|
||||
user_db = RealUserDatabase()
|
||||
|
||||
# 生成5个不同的User-Agent
|
||||
for i in range(5):
|
||||
profile = user_db.get_random_user_profile()
|
||||
ua = profile["user_agent"]
|
||||
os_info = profile["operating_system"]
|
||||
|
||||
print(f"🎭 User-Agent {i+1} ({os_info}):")
|
||||
print(f" {ua}")
|
||||
print()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_user_agent_with_proxy()
|
||||
test_multiple_user_agents()
|
||||
Reference in New Issue
Block a user