Files
shualiangv1/target_site_test.py
huangzhenpc 4db06ac517 正式22
2025-07-18 11:23:13 +08:00

218 lines
7.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
目标网站测试工具
专门测试game.586vip.cn的User-Agent处理
"""
import requests
import json
import time
import random
from real_user_database import RealUserDatabase
def test_target_site():
"""测试目标网站的User-Agent处理"""
print("🎯 目标网站测试工具")
print("=" * 50)
# 加载配置
try:
with open('config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
except FileNotFoundError:
print("❌ 配置文件config.json不存在")
return
# 生成真实用户配置
user_db = RealUserDatabase()
profile = user_db.get_random_user_profile()
real_user_agent = profile["user_agent"]
print(f"📱 使用的User-Agent:")
print(f" {real_user_agent}")
print()
# 代理配置
proxy_config = config.get('proxy')
if proxy_config:
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
proxies = {
'http': proxy_url,
'https': proxy_url
}
print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}")
else:
proxies = None
print("🏠 不使用代理")
print()
# 测试目标网站
target_urls = [
"https://game.586vip.cn/",
"https://game.586vip.cn/games/2048/index.html",
]
for url in target_urls:
print(f"🔍 测试目标网站: {url}")
session = requests.Session()
if proxies:
session.proxies = proxies
# 设置完整的真实浏览器头部
headers = {
"User-Agent": real_user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
"DNT": "1",
"Pragma": "no-cache",
}
session.headers.update(headers)
try:
response = session.get(url, timeout=15, allow_redirects=True)
print(f"✅ 请求成功 ({response.status_code})")
print(f"📦 响应大小: {len(response.content)} 字节")
print(f"🌍 最终URL: {response.url}")
# 检查响应头部
important_headers = ['Server', 'Content-Type', 'Set-Cookie', 'X-Powered-By']
for header in important_headers:
if header in response.headers:
value = response.headers[header]
if header == 'Set-Cookie':
print(f"🍪 {header}: {value[:100]}...")
else:
print(f"📋 {header}: {value}")
# 检查是否被重定向
if response.url != url:
print(f"🔄 发生重定向: {url} -> {response.url}")
# 检查页面内容
content = response.text
if len(content) > 0:
print(f"📄 页面内容长度: {len(content)} 字符")
# 检查是否包含登录相关内容
login_indicators = ['login', '登录', 'sign in', '验证', 'auth', 'password', '密码']
login_count = sum(1 for indicator in login_indicators if indicator.lower() in content.lower())
if login_count > 3:
print("⚠️ 可能被重定向到登录页面")
print(f" 检测到 {login_count} 个登录相关关键词")
# 检查是否包含防护相关内容
protection_indicators = ['cloudflare', 'ddos', 'protection', 'checking', 'verify', 'bot', 'captcha']
protection_count = sum(1 for indicator in protection_indicators if indicator.lower() in content.lower())
if protection_count > 2:
print("🛡️ 可能遇到防护页面")
print(f" 检测到 {protection_count} 个防护相关关键词")
# 显示页面开头内容
print(f"📝 页面开头内容:")
print(f" {content[:300]}...")
else:
print("⚠️ 页面内容为空")
except requests.exceptions.RequestException as e:
print(f"❌ 请求失败: {e}")
print("-" * 70)
time.sleep(3) # 避免频繁请求
print("\n🎯 分析结果:")
print("如果宝塔显示'WanScannerBot/1.1',可能的原因:")
print("1. 网站使用了CDN或反向代理修改了User-Agent")
print("2. 网站的防护系统检测到某些特征,标记为爬虫")
print("3. 宝塔的日志记录有问题")
print("4. 网站服务器端的中间件修改了User-Agent")
print()
print("建议解决方案:")
print("1. 尝试更换不同的User-Agent")
print("2. 增加更多真实浏览器特征")
print("3. 模拟更真实的访问行为")
print("4. 检查网站的防护机制")
def test_different_user_agents():
"""测试不同的User-Agent对目标网站的影响"""
print("\n🔬 多User-Agent目标网站测试")
print("=" * 50)
# 加载配置
try:
with open('config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
except FileNotFoundError:
print("❌ 配置文件config.json不存在")
return
# 代理配置
proxy_config = config.get('proxy')
if proxy_config:
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
proxies = {
'http': proxy_url,
'https': proxy_url
}
else:
proxies = None
# 测试不同的User-Agent
user_db = RealUserDatabase()
target_url = "https://game.586vip.cn/"
for i in range(3):
profile = user_db.get_random_user_profile()
ua = profile["user_agent"]
os_info = profile["operating_system"]
print(f"🎭 测试User-Agent {i+1} ({os_info}):")
print(f" {ua}")
session = requests.Session()
if proxies:
session.proxies = proxies
session.headers.update({
"User-Agent": ua,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
})
try:
response = session.get(target_url, timeout=10)
print(f" ✅ 状态码: {response.status_code}")
print(f" 📦 响应大小: {len(response.content)} 字节")
# 简单检查是否正常
if response.status_code == 200 and len(response.content) > 1000:
print(" 👍 访问正常")
else:
print(" ⚠️ 访问可能异常")
except requests.exceptions.RequestException as e:
print(f" ❌ 请求失败: {e}")
print()
time.sleep(2)
if __name__ == "__main__":
test_target_site()
test_different_user_agents()