shualiangv1/website_traffic_bot_final.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
网站流量模拟脚本 (最终版本)
使用真实用户数据库模拟最真实的访问轨迹
支持Google搜索来源和真实网站跳转
"""

import requests
import time
import random
import json
import os
import logging
from urllib.parse import urlparse, urljoin
import re
from real_user_database import RealUserDatabase

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('traffic_bot_final.log', encoding='utf-8'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class WebTrafficBotFinal:
    def __init__(self, config_file='config.json'):
        """
        初始化最终版流量机器人
        """
        self.config = self.load_config(config_file)
        self.session = None
        self.user_db = RealUserDatabase()
        self.current_profile = None
        self.current_behavior = None

        # 真实的访问来源页面（主要从Google和真实网站）
        self.traffic_sources = [
            # Google搜索来源 (主要)
            "https://www.google.com/search?q=2048+game+online",
            "https://www.google.com/search?q=html5+games",
            "https://www.google.com/search?q=browser+games+2048",
            "https://www.google.com/search?q=free+online+games",
            "https://www.google.com/search?q=puzzle+games+online",
            "https://www.google.com/search?q=数字游戏+2048",
            "https://www.google.com/search?q=在线小游戏",

            # 用户指定的真实网站
            "https://github.com/chengazhen/cursor-auto-free",
            "https://linux.do/",

            # 其他真实来源网站
            "https://github.com/trending",
            "https://github.com/topics/game",
            "https://news.ycombinator.com/",
            "https://www.reddit.com/r/WebGames/",
            "https://www.reddit.com/r/incremental_games/",
            "https://www.producthunt.com/",
            "https://stackoverflow.com/",
            "https://www.zhihu.com/",
            "https://v2ex.com/",
            "https://segmentfault.com/",
            "https://juejin.cn/",
            "https://www.csdn.net/",
            "https://www.oschina.net/",
            "https://gitee.com/",
        ]

    def load_config(self, config_file):
        """加载配置文件"""
        try:
            with open(config_file, 'r', encoding='utf-8') as f:
                config = json.load(f)
            logger.info(f"配置文件加载成功: {config_file}")
            return config
        except FileNotFoundError:
            logger.error(f"配置文件未找到: {config_file}")
            raise
        except json.JSONDecodeError as e:
            logger.error(f"配置文件格式错误: {e}")
            raise

    def setup_session(self):
        """设置请求会话"""
        self.session = requests.Session()

        # 生成真实用户配置
        self.current_profile = self.user_db.get_random_user_profile()
        self.current_behavior = self.user_db.get_visit_behavior()

        # 设置代理
        proxy_config = self.config.get('proxy')
        if proxy_config:
            proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
            self.session.proxies = {
                'http': proxy_url,
                'https': proxy_url
            }
            logger.info(f"已配置代理: {proxy_config['host']}:{proxy_config['port']}")

        # 使用真实用户数据库生成的头部
        realistic_headers = self.user_db.get_realistic_headers(self.current_profile)
        self.session.headers.update(realistic_headers)

        # 记录用户特征
        logger.info(f"🎭 用户身份配置:")
        logger.info(f"  操作系统: {self.current_profile['operating_system']}")
        logger.info(f"  屏幕分辨率: {self.current_profile['screen_resolution']}")
        logger.info(f"  浏览器语言: {self.current_profile['language']}")
        logger.info(f"  时区: {self.current_profile['timezone']}")
        logger.info(f"  访问时间模式: {self.current_behavior['pattern_type']}")
        logger.info(f"  硬件并发数: {self.current_profile['hardware_concurrency']}")
        logger.info(f"  设备内存: {self.current_profile['device_memory']}GB")

        # 获取并显示当前IP
        current_ip = self.get_current_ip()
        if current_ip:
            print(f"🌍 当前IP地址: {current_ip}")
            print(f"👤 用户身份: {self.current_profile['operating_system']} | {self.current_profile['screen_resolution']} | {self.current_behavior['pattern_type']}")

    def get_current_ip(self):
        """获取当前IP地址"""
        ip_services = [
            'https://httpbin.org/ip',
            'https://api.ipify.org?format=json',
            'https://ipinfo.io/json',
        ]

        for service in ip_services:
            try:
                logger.info(f"🔍 正在获取IP地址: {service}")
                response = self.session.get(service, timeout=10)

                if response.status_code == 200:
                    if 'json' in service or 'httpbin' in service or 'ipinfo' in service:
                        try:
                            data = response.json()
                            if 'origin' in data:
                                ip = data['origin']
                            elif 'ip' in data:
                                ip = data['ip']
                            else:
                                ip = str(data)
                        except:
                            ip = response.text.strip()
                    else:
                        ip = response.text.strip()

                    logger.info(f"✅ 当前IP地址: {ip}")
                    return ip

            except Exception as e:
                logger.warning(f"从 {service} 获取IP失败: {e}")
                continue

        logger.error("❌ 无法获取当前IP地址")
        return None

    def simulate_realistic_source_visit(self):
        """模拟真实的来源网站访问"""
        source_page = random.choice(self.traffic_sources)

        try:
            logger.info(f"🔗 模拟从来源访问: {source_page}")

            # 根据来源类型调整访问行为
            if "google.com" in source_page:
                stay_time = self.user_db.get_realistic_timing(
                    random.uniform(3, 12),  # Google搜索停留时间较短
                    self.current_behavior
                )
                logger.info(f"🔍 Google搜索停留 {stay_time:.1f} 秒")
                self._simulate_google_search_behavior(stay_time)

            elif "github.com" in source_page:
                stay_time = self.user_db.get_realistic_timing(
                    random.uniform(8, 25),  # GitHub停留时间中等
                    self.current_behavior
                )
                logger.info(f"🐙 GitHub页面停留 {stay_time:.1f} 秒")
                self._simulate_github_behavior(stay_time, source_page)

            elif "linux.do" in source_page:
                stay_time = self.user_db.get_realistic_timing(
                    random.uniform(10, 30),  # 技术社区停留时间较长
                    self.current_behavior
                )
                logger.info(f"💻 Linux.do社区停留 {stay_time:.1f} 秒")
                self._simulate_community_behavior(stay_time)

            else:
                stay_time = self.user_db.get_realistic_timing(
                    random.uniform(5, 20),  # 其他网站
                    self.current_behavior
                )
                logger.info(f"🌐 其他网站停留 {stay_time:.1f} 秒")
                self._simulate_general_browsing(stay_time)

            # 设置referrer用于后续访问
            self.session.headers.update({
                'Referer': source_page,
                'Sec-Fetch-Site': 'cross-site'
            })

            return True

        except Exception as e:
            logger.error(f"来源访问模拟失败: {e}")
            return False

    def _simulate_google_search_behavior(self, total_time):
        """模拟Google搜索行为"""
        actions = [
            "输入搜索关键词",
            "查看搜索结果",
            "滚动浏览结果页面",
            "点击相关搜索建议",
            "查看图片搜索结果"
        ]

        segments = random.randint(2, 4)  # Google搜索动作较少
        segment_time = total_time / segments

        for i, action in enumerate(random.sample(actions, segments)):
            if i > 0:
                delay = self.user_db.simulate_human_delays("thinking")
                time.sleep(delay)

            logger.info(f"  🔍 Google行为: {action}")

            if "输入" in action:
                # 模拟输入搜索词的时间
                typing_time = random.uniform(2, 5)
                time.sleep(typing_time)
            elif "滚动" in action:
                # 模拟快速滚动查看结果
                time.sleep(segment_time * 0.6)
            else:
                time.sleep(segment_time * 0.8)

    def _simulate_github_behavior(self, total_time, github_url):
        """模拟GitHub页面行为"""
        if "cursor-auto-free" in github_url:
            actions = [
                "查看项目README",
                "阅读项目描述",
                "查看Stars和Forks数量",
                "浏览代码文件",
                "查看Issues讨论",
                "阅读使用说明"
            ]
        else:
            actions = [
                "查看trending项目",
                "浏览热门仓库",
                "查看项目描述",
                "阅读README文件",
                "查看代码示例"
            ]

        segments = random.randint(3, 6)
        segment_time = total_time / segments

        for i in range(segments):
            if i > 0:
                delay = self.user_db.simulate_human_delays("reading")
                time.sleep(delay)

            action = random.choice(actions)
            logger.info(f"  🐙 GitHub行为: {action}")

            if "阅读" in action:
                time.sleep(segment_time * 0.7)
            elif "查看" in action:
                time.sleep(segment_time * 0.5)
            else:
                time.sleep(segment_time * 0.4)

    def _simulate_community_behavior(self, total_time):
        """模拟技术社区行为"""
        actions = [
            "浏览热门帖子",
            "阅读技术讨论",
            "查看最新话题",
            "搜索相关内容",
            "查看用户资料",
            "阅读精华帖子"
        ]

        segments = random.randint(4, 7)  # 社区停留时间较长，动作较多
        segment_time = total_time / segments

        for i in range(segments):
            if i > 0:
                delay = self.user_db.simulate_human_delays("reading")
                time.sleep(delay)

            action = random.choice(actions)
            logger.info(f"  💻 社区行为: {action}")

            if "阅读" in action:
                time.sleep(segment_time * 0.8)  # 阅读时间较长
            else:
                time.sleep(segment_time * 0.6)

    def _simulate_general_browsing(self, total_time):
        """模拟一般网站浏览行为"""
        actions = [
            "浏览页面内容",
            "查看导航菜单",
            "滚动阅读文章",
            "点击相关链接",
            "查看评论区"
        ]

        segments = random.randint(3, 5)
        segment_time = total_time / segments

        for i in range(segments):
            if i > 0:
                delay = self.user_db.simulate_human_delays("normal")
                time.sleep(delay)

            action = random.choice(actions)
            logger.info(f"  🌐 浏览行为: {action}")
            time.sleep(segment_time * random.uniform(0.7, 1.2))

    def visit_main_site_realistic(self):
        """真实模拟访问主网站"""
        main_site = self.config['targets']['main_site']

        try:
            logger.info(f"🏠 访问目标网站: {main_site}")

            # 发起请求
            response = self.make_realistic_request(main_site)
            if not response:
                return False

            # 模拟真实的页面浏览行为
            self._simulate_realistic_browsing(response, is_main_page=True)

            return True

        except Exception as e:
            logger.error(f"主网站访问失败: {e}")
            return False

    def visit_game_page_realistic(self):
        """真实模拟访问游戏页面"""
        game_page = self.config['targets']['game_page']
        main_site = self.config['targets']['main_site']

        try:
            logger.info(f"🎮 访问游戏页面: {game_page}")

            # 更新referrer为主站
            self.session.headers.update({
                'Referer': main_site,
                'Sec-Fetch-Site': 'same-origin'
            })

            # 发起请求
            response = self.make_realistic_request(game_page)
            if not response:
                return False

            # 检查是否需要登录
            if self._check_login_required(response):
                logger.warning("⚠️ 检测到登录页面，继续模拟访问")

            # 模拟真实的游戏页面行为
            self._simulate_realistic_gaming(response)

            return True

        except Exception as e:
            logger.error(f"游戏页面访问失败: {e}")
            return False

    def _check_login_required(self, response):
        """检查是否需要登录"""
        if not response:
            return False

        content = response.text.lower()
        login_indicators = [
            '登录', 'login', '用户名', 'username', 'password', '密码',
            'signin', 'sign in', '账号', 'account', '验证码'
        ]

        login_count = sum(1 for indicator in login_indicators if indicator in content)

        if login_count >= 3:
            logger.warning("⚠️ 检测到可能的登录页面")
            logger.info("💡 建议：检查网站是否需要登录访问")
            return True

        return False

    def make_realistic_request(self, url, timeout=15):
        """发起真实的HTTP请求"""
        try:
            # 添加随机的人为延迟
            pre_request_delay = self.user_db.simulate_human_delays("thinking")
            time.sleep(pre_request_delay)

            response = self.session.get(url, timeout=timeout, allow_redirects=True)

            # 记录详细信息
            logger.info(f"📡 HTTP请求详情:")
            logger.info(f"  📍 访问URL: {url}")
            logger.info(f"  📊 状态码: {response.status_code}")
            logger.info(f"  📦 响应大小: {len(response.content)} 字节")
            logger.info(f"  ⏱️ 响应时间: {response.elapsed.total_seconds():.2f}秒")

            if response.headers.get('content-type'):
                logger.info(f"  📄 内容类型: {response.headers.get('content-type')}")

            if response.headers.get('server'):
                logger.info(f"  🖥️ 服务器: {response.headers.get('server')}")

            response.raise_for_status()
            return response

        except requests.exceptions.RequestException as e:
            logger.error(f"❌ 请求失败 {url}: {e}")
            return None

    def _simulate_realistic_browsing(self, response, is_main_page=False):
        """模拟真实的页面浏览行为"""
        content = response.text

        # 估算页面内容长度和阅读时间
        text_length = len(re.sub(r'<[^>]+>', '', content))
        reading_time = text_length / self.current_behavior['reading_speed'] * 60

        logger.info(f"📖 页面分析:")
        logger.info(f"  📝 内容长度: {text_length} 字符")
        logger.info(f"  ⏱️ 预估阅读时间: {reading_time:.1f} 秒")

        # 获取真实的停留时间
        if is_main_page:
            base_time = random.uniform(*self.config['settings']['main_site_stay_time'])
        else:
            base_time = min(reading_time, 60)  # 最多60秒阅读时间

        stay_time = self.user_db.get_realistic_timing(base_time, self.current_behavior)

        logger.info(f"  🕐 实际停留时间: {stay_time:.1f} 秒")

        # 模拟分段浏览
        self._simulate_browsing_segments(stay_time, content)

    def _simulate_browsing_segments(self, total_time, content):
        """模拟分段浏览行为"""
        # 查找页面中的链接
        links = re.findall(r'href=["\'](.*?)["\']', content)
        internal_links = [link for link in links if not link.startswith('http') or self.config['targets']['main_site'] in link]

        segments = random.randint(3, 8)
        segment_time = total_time / segments

        browsing_actions = [
            "📖 阅读页面内容",
            "🧭 查看导航菜单",
            "📜 滚动浏览页面",
            "🔗 检查页面链接",
            "👁️ 观察页面布局",
            "📱 查看页脚信息"
        ]

        for i in range(segments):
            action = random.choice(browsing_actions)
            logger.info(f"  {action}")

            # 根据行为类型调整时间
            if "滚动" in action:
                self._simulate_scrolling_behavior(segment_time)
            elif "检查链接" in action and internal_links:
                self._simulate_link_hovering(internal_links)
                time.sleep(segment_time * 0.8)
            else:
                actual_segment_time = segment_time * random.uniform(0.7, 1.3)
                time.sleep(actual_segment_time)

    def _simulate_scrolling_behavior(self, duration):
        """模拟真实的滚动行为"""
        scroll_sessions = random.randint(2, 5)
        session_time = duration / scroll_sessions

        for session in range(scroll_sessions):
            logger.info(f"    📜 滚动会话 {session + 1}")

            # 模拟快速滚动
            quick_scrolls = random.randint(2, 4)
            for _ in range(quick_scrolls):
                time.sleep(random.uniform(0.3, 1.0))

            # 模拟停顿阅读
            if random.random() < 0.7:
                pause_time = random.uniform(1, 4)
                logger.info(f"    ⏸️ 停顿阅读 {pause_time:.1f}秒")
                time.sleep(pause_time)

    def _simulate_link_hovering(self, links):
        """模拟鼠标悬停在链接上"""
        hover_count = min(random.randint(1, 3), len(links))
        sample_links = random.sample(links, hover_count)

        for link in sample_links:
            logger.info(f"    🔗 查看链接: {link[:50]}...")
            time.sleep(random.uniform(0.5, 2.5))

    def _simulate_realistic_gaming(self, response):
        """模拟真实的游戏行为"""
        logger.info("🎲 开始2048游戏模拟")

        # 游戏前的准备时间
        prep_time = self.user_db.simulate_human_delays("thinking")
        logger.info(f"🤔 游戏加载和准备: {prep_time:.1f}秒")
        time.sleep(prep_time)

        # 获取游戏停留时间
        base_time = random.uniform(*self.config['settings']['game_page_stay_time'])
        game_time = self.user_db.get_realistic_timing(base_time, self.current_behavior)

        logger.info(f"🎮 游戏总时长: {game_time:.1f}秒")

        # 模拟游戏过程
        self._simulate_game_sessions(game_time)

    def _simulate_game_sessions(self, total_time):
        """模拟游戏会话"""
        sessions = random.randint(2, 4)

        for session in range(sessions):
            session_time = total_time / sessions * random.uniform(0.8, 1.2)
            logger.info(f"🎯 游戏会话 {session + 1}/{sessions}, 时长: {session_time:.1f}秒")

            self._simulate_single_game_session(session_time)

            # 会话间休息
            if session < sessions - 1:
                break_time = random.uniform(3, 10)
                logger.info(f"⏸️ 休息思考: {break_time:.1f}秒")
                time.sleep(break_time)

    def _simulate_single_game_session(self, session_time):
        """模拟单个游戏会话"""
        game_moves = ["⬆️上", "⬇️下", "⬅️左", "➡️右"]

        start_time = time.time()
        move_count = 0

        while time.time() - start_time < session_time:
            move = random.choice(game_moves)
            move_count += 1

            logger.info(f"  🎮 第{move_count}步: {move}")

            # 模拟不同难度的思考时间
            if move_count % 8 == 0:  # 每8步深度思考
                think_time = random.uniform(4, 10)
                logger.info(f"    🧠 策略思考: {think_time:.1f}秒")
                time.sleep(think_time)
            elif random.random() < 0.4:  # 40%概率短暂思考
                think_time = random.uniform(0.8, 3)
                time.sleep(think_time)
            else:  # 快速移动
                time.sleep(random.uniform(0.4, 1.2))

            # 模拟偶尔的错误操作和纠正
            if random.random() < 0.06:  # 6%概率误操作
                logger.info("    ❌ 误操作，立即纠正")
                time.sleep(0.3)
                corrective_move = random.choice(game_moves)
                logger.info(f"  🔄 纠正: {corrective_move}")
                time.sleep(random.uniform(0.5, 1.0))

    def run_single_visit(self):
        """执行一次完整的真实访问流程"""
        logger.info("🚀 开始执行真实访问流程")

        # 设置会话
        self.setup_session()

        try:
            # 1. 模拟来源网站访问
            if not self.simulate_realistic_source_visit():
                logger.warning("⚠️ 来源访问模拟失败，继续执行")

            # 2. 访问主网站
            if not self.visit_main_site_realistic():
                logger.error("❌ 主网站访问失败")
                return False

            # 3. 访问游戏页面
            if not self.visit_game_page_realistic():
                logger.error("❌ 游戏页面访问失败")
                return False

            logger.info("✅ 访问流程完美执行成功！")
            return True

        except Exception as e:
            logger.error(f"❌ 访问流程执行出错: {e}")
            return False

        finally:
            if self.session:
                self.session.close()

    def run_continuous(self, total_visits=None, delay_range=None):
        """连续执行多次真实访问"""
        if total_visits is None:
            total_visits = self.config['settings']['default_visits']

        if delay_range is None:
            delay_range = (
                self.config['settings']['min_delay'],
                self.config['settings']['max_delay']
            )

        success_count = 0

        logger.info(f"🎯 开始连续访问，目标: {total_visits} 次")

        for i in range(total_visits):
            logger.info(f"{'='*60}")
            logger.info(f"🔄 执行第 {i+1}/{total_visits} 次访问")
            logger.info(f"{'='*60}")

            if self.run_single_visit():
                success_count += 1
                logger.info(f"✅ 第 {i+1} 次访问成功！累计成功: {success_count}")
            else:
                logger.error(f"❌ 第 {i+1} 次访问失败！")

            # 智能延迟
            if i < total_visits - 1:
                base_delay = random.uniform(delay_range[0], delay_range[1])
                behavior = self.user_db.get_visit_behavior()

                # 根据访问模式调整延迟
                if behavior['pattern_type'] == "工作时间":
                    delay = base_delay * 0.7
                elif behavior['pattern_type'] == "深夜":
                    delay = base_delay * 1.8
                else:
                    delay = base_delay

                logger.info(f"⏳ 智能等待 {delay:.1f} 秒 (当前时段: {behavior['pattern_type']})")
                time.sleep(delay)

        success_rate = (success_count / total_visits) * 100
        logger.info(f"🎉 访问任务完成！")
        logger.info(f"📊 成功率: {success_count}/{total_visits} ({success_rate:.1f}%)")
        return success_count

def main():
    """主函数"""
    config_file = 'config.json'

    if not os.path.exists(config_file):
        print(f"❌ 配置文件 {config_file} 不存在！")
        return

    try:
        bot = WebTrafficBotFinal(config_file)

        print("=" * 60)
        print("🎭 网站流量模拟脚本 (最终升级版)")
        print("=" * 60)
        print("🌟 特性：真实用户行为 + Google/GitHub来源 + 完整浏览器指纹")
        print("⚠️  请确保仅用于测试自己的网站！")
        print()
        print(f"🎯 目标网站: {bot.config['targets']['main_site']}")
        print(f"🎮 游戏页面: {bot.config['targets']['game_page']}")
        print()

        print("请选择运行模式:")
        print("1. 💎 单次完整访问测试")
        print("2. 🚀 连续访问模式 (使用配置参数)")
        print("3. ⚙️ 自定义连续访问")

        choice = input("请输入选择 (1/2/3): ").strip()

        if choice == "1":
            logger.info("🎬 开始单次完整访问测试")
            success = bot.run_single_visit()
            if success:
                print("🎉 单次访问测试完美成功！")
            else:
                print("😞 单次访问测试失败！")

        elif choice == "2":
            logger.info("🎬 开始连续访问模式")
            success_count = bot.run_continuous()
            total = bot.config['settings']['default_visits']
            print(f"🎉 连续访问完成！成功率: {success_count}/{total} ({(success_count/total)*100:.1f}%)")

        elif choice == "3":
            try:
                visit_count = int(input("请输入访问次数: ").strip())
                min_delay = int(input("请输入最小延迟秒数: ").strip())
                max_delay = int(input("请输入最大延迟秒数: ").strip())

                logger.info(f"🎬 开始自定义连续访问，总次数: {visit_count}")
                success_count = bot.run_continuous(
                    total_visits=visit_count,
                    delay_range=(min_delay, max_delay)
                )

                print(f"🎉 自定义访问完成！成功率: {success_count}/{visit_count} ({(success_count/visit_count)*100:.1f}%)")

            except ValueError:
                print("❌ 输入参数错误！")
        else:
            print("❌ 无效选择！")

    except KeyboardInterrupt:
        print("\n⚠️ 用户中断执行")
    except Exception as e:
        logger.error(f"程序执行出错: {e}")
        print("❌ 程序执行出错，请检查日志文件 traffic_bot_final.log")

if __name__ == "__main__":
    main()