shualiangv1/target_site_test.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
目标网站测试工具
专门测试game.586vip.cn的User-Agent处理
"""

import requests
import json
import time
import random
from real_user_database import RealUserDatabase

def test_target_site():
    """测试目标网站的User-Agent处理"""
    print("🎯 目标网站测试工具")
    print("=" * 50)

    # 加载配置
    try:
        with open('config.json', 'r', encoding='utf-8') as f:
            config = json.load(f)
    except FileNotFoundError:
        print("❌ 配置文件config.json不存在")
        return

    # 生成真实用户配置
    user_db = RealUserDatabase()
    profile = user_db.get_random_user_profile()
    real_user_agent = profile["user_agent"]

    print(f"📱 使用的User-Agent:")
    print(f"   {real_user_agent}")
    print()

    # 代理配置
    proxy_config = config.get('proxy')
    if proxy_config:
        proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
        proxies = {
            'http': proxy_url,
            'https': proxy_url
        }
        print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}")
    else:
        proxies = None
        print("🏠 不使用代理")

    print()

    # 测试目标网站
    target_urls = [
        "https://game.586vip.cn/",
        "https://game.586vip.cn/games/2048/index.html",
    ]

    for url in target_urls:
        print(f"🔍 测试目标网站: {url}")

        session = requests.Session()
        if proxies:
            session.proxies = proxies

        # 设置完整的真实浏览器头部
        headers = {
            "User-Agent": real_user_agent,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
            "Cache-Control": "max-age=0",
            "DNT": "1",
            "Pragma": "no-cache",
        }

        session.headers.update(headers)

        try:
            response = session.get(url, timeout=15, allow_redirects=True)

            print(f"✅ 请求成功 ({response.status_code})")
            print(f"📦 响应大小: {len(response.content)} 字节")
            print(f"🌍 最终URL: {response.url}")

            # 检查响应头部
            important_headers = ['Server', 'Content-Type', 'Set-Cookie', 'X-Powered-By']
            for header in important_headers:
                if header in response.headers:
                    value = response.headers[header]
                    if header == 'Set-Cookie':
                        print(f"🍪 {header}: {value[:100]}...")
                    else:
                        print(f"📋 {header}: {value}")

            # 检查是否被重定向
            if response.url != url:
                print(f"🔄 发生重定向: {url} -> {response.url}")

            # 检查页面内容
            content = response.text
            if len(content) > 0:
                print(f"📄 页面内容长度: {len(content)} 字符")

                # 检查是否包含登录相关内容
                login_indicators = ['login', '登录', 'sign in', '验证', 'auth', 'password', '密码']
                login_count = sum(1 for indicator in login_indicators if indicator.lower() in content.lower())

                if login_count > 3:
                    print("⚠️ 可能被重定向到登录页面")
                    print(f"   检测到 {login_count} 个登录相关关键词")

                # 检查是否包含防护相关内容
                protection_indicators = ['cloudflare', 'ddos', 'protection', 'checking', 'verify', 'bot', 'captcha']
                protection_count = sum(1 for indicator in protection_indicators if indicator.lower() in content.lower())

                if protection_count > 2:
                    print("🛡️ 可能遇到防护页面")
                    print(f"   检测到 {protection_count} 个防护相关关键词")

                # 显示页面开头内容
                print(f"📝 页面开头内容:")
                print(f"   {content[:300]}...")

            else:
                print("⚠️ 页面内容为空")

        except requests.exceptions.RequestException as e:
            print(f"❌ 请求失败: {e}")

        print("-" * 70)
        time.sleep(3)  # 避免频繁请求

    print("\n🎯 分析结果:")
    print("如果宝塔显示'WanScannerBot/1.1'，可能的原因：")
    print("1. 网站使用了CDN或反向代理，修改了User-Agent")
    print("2. 网站的防护系统检测到某些特征，标记为爬虫")
    print("3. 宝塔的日志记录有问题")
    print("4. 网站服务器端的中间件修改了User-Agent")
    print()
    print("建议解决方案：")
    print("1. 尝试更换不同的User-Agent")
    print("2. 增加更多真实浏览器特征")
    print("3. 模拟更真实的访问行为")
    print("4. 检查网站的防护机制")

def test_different_user_agents():
    """测试不同的User-Agent对目标网站的影响"""
    print("\n🔬 多User-Agent目标网站测试")
    print("=" * 50)

    # 加载配置
    try:
        with open('config.json', 'r', encoding='utf-8') as f:
            config = json.load(f)
    except FileNotFoundError:
        print("❌ 配置文件config.json不存在")
        return

    # 代理配置
    proxy_config = config.get('proxy')
    if proxy_config:
        proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
        proxies = {
            'http': proxy_url,
            'https': proxy_url
        }
    else:
        proxies = None

    # 测试不同的User-Agent
    user_db = RealUserDatabase()
    target_url = "https://game.586vip.cn/"

    for i in range(3):
        profile = user_db.get_random_user_profile()
        ua = profile["user_agent"]
        os_info = profile["operating_system"]

        print(f"🎭 测试User-Agent {i+1} ({os_info}):")
        print(f"   {ua}")

        session = requests.Session()
        if proxies:
            session.proxies = proxies

        session.headers.update({
            "User-Agent": ua,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        })

        try:
            response = session.get(target_url, timeout=10)
            print(f"   ✅ 状态码: {response.status_code}")
            print(f"   📦 响应大小: {len(response.content)} 字节")

            # 简单检查是否正常
            if response.status_code == 200 and len(response.content) > 1000:
                print("   👍 访问正常")
            else:
                print("   ⚠️ 访问可能异常")

        except requests.exceptions.RequestException as e:
            print(f"   ❌ 请求失败: {e}")

        print()
        time.sleep(2)

if __name__ == "__main__":
    test_target_site()
    test_different_user_agents()