Files
shualiangv1/website_traffic_bot_realistic.py
huangzhenpc 9f9f44ecc7 正式2
2025-07-18 10:08:38 +08:00

572 lines
21 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
网站流量模拟脚本 (真实用户行为版本)
使用真实用户数据库模拟更真实的访问轨迹
"""
import requests
import time
import random
import json
import os
import logging
from urllib.parse import urlparse, urljoin
import re
from real_user_database import RealUserDatabase
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('traffic_bot_realistic.log', encoding='utf-8'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
class WebTrafficBotRealistic:
def __init__(self, config_file='config.json'):
"""
初始化真实用户行为版流量机器人
"""
self.config = self.load_config(config_file)
self.session = None
self.user_db = RealUserDatabase()
self.current_profile = None
self.current_behavior = None
# 真实的GitHub来源页面更丰富的列表
self.github_referrers = [
"https://github.com/trending",
"https://github.com/trending/javascript",
"https://github.com/trending/typescript",
"https://github.com/topics/javascript",
"https://github.com/topics/game",
"https://github.com/topics/html5",
"https://github.com/topics/2048",
"https://github.com/search?q=2048+game",
"https://github.com/search?q=html5+games",
"https://github.com/collections/javascript-game-engines",
"https://github.com/explore",
"https://github.com/",
]
def load_config(self, config_file):
"""加载配置文件"""
try:
with open(config_file, 'r', encoding='utf-8') as f:
config = json.load(f)
logger.info(f"配置文件加载成功: {config_file}")
return config
except FileNotFoundError:
logger.error(f"配置文件未找到: {config_file}")
raise
except json.JSONDecodeError as e:
logger.error(f"配置文件格式错误: {e}")
raise
def setup_session(self):
"""设置请求会话"""
self.session = requests.Session()
# 生成真实用户配置
self.current_profile = self.user_db.get_random_user_profile()
self.current_behavior = self.user_db.get_visit_behavior()
# 设置代理
proxy_config = self.config.get('proxy')
if proxy_config:
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
self.session.proxies = {
'http': proxy_url,
'https': proxy_url
}
logger.info(f"已配置代理: {proxy_config['host']}:{proxy_config['port']}")
# 使用真实用户数据库生成的头部
realistic_headers = self.user_db.get_realistic_headers(self.current_profile)
self.session.headers.update(realistic_headers)
# 记录用户特征
logger.info(f"用户配置:")
logger.info(f" 操作系统: {self.current_profile['operating_system']}")
logger.info(f" 屏幕分辨率: {self.current_profile['screen_resolution']}")
logger.info(f" 语言: {self.current_profile['language']}")
logger.info(f" 时区: {self.current_profile['timezone']}")
logger.info(f" 访问模式: {self.current_behavior['pattern_type']}")
# 获取并显示当前IP
current_ip = self.get_current_ip()
if current_ip:
print(f"🌍 当前IP地址: {current_ip}")
print(f"👤 用户配置: {self.current_profile['operating_system']} | {self.current_profile['screen_resolution']} | {self.current_behavior['pattern_type']}")
def get_current_ip(self):
"""获取当前IP地址"""
ip_services = [
'https://httpbin.org/ip',
'https://api.ipify.org?format=json',
'https://ipinfo.io/json',
]
for service in ip_services:
try:
logger.info(f"正在获取IP地址: {service}")
response = self.session.get(service, timeout=10)
if response.status_code == 200:
if 'json' in service or 'httpbin' in service or 'ipinfo' in service:
try:
data = response.json()
if 'origin' in data:
ip = data['origin']
elif 'ip' in data:
ip = data['ip']
else:
ip = str(data)
except:
ip = response.text.strip()
else:
ip = response.text.strip()
logger.info(f"✅ 当前IP地址: {ip}")
return ip
except Exception as e:
logger.warning(f"{service} 获取IP失败: {e}")
continue
logger.error("❌ 无法获取当前IP地址")
return None
def simulate_realistic_github_visit(self):
"""模拟真实的GitHub访问过程"""
github_page = random.choice(self.github_referrers)
try:
logger.info(f"🔍 模拟从GitHub访问: {github_page}")
# 更新headers以模拟GitHub访问
github_headers = self.user_db.get_realistic_headers(
self.current_profile,
referrer=None
)
# 模拟在GitHub的真实浏览行为
github_stay_time = self.user_db.get_realistic_timing(
random.uniform(5, 15),
self.current_behavior
)
logger.info(f"📚 在GitHub页面停留 {github_stay_time:.1f}")
# 模拟GitHub上的行为
self._simulate_github_browsing(github_stay_time)
# 设置referrer用于后续访问
self.session.headers.update({
'Referer': github_page,
'Sec-Fetch-Site': 'cross-site'
})
return True
except Exception as e:
logger.error(f"GitHub访问模拟失败: {e}")
return False
def _simulate_github_browsing(self, total_time):
"""模拟在GitHub上的浏览行为"""
actions = [
"查看项目描述",
"阅读README",
"查看代码文件",
"检查更新时间",
"查看星标数量",
"浏览提交历史"
]
segments = random.randint(3, 6)
segment_time = total_time / segments
for i in range(segments):
if i > 0:
delay = self.user_db.simulate_human_delays("reading")
time.sleep(delay)
action = random.choice(actions)
logger.info(f" GitHub行为: {action}")
# 模拟不同行为的停留时间
if "阅读" in action:
time.sleep(segment_time * 0.4)
elif "查看" in action:
time.sleep(segment_time * 0.3)
else:
time.sleep(segment_time * 0.2)
def visit_main_site_realistic(self):
"""真实模拟访问主网站"""
main_site = self.config['targets']['main_site']
try:
logger.info(f"🏠 访问主网站: {main_site}")
# 发起请求
response = self.make_realistic_request(main_site)
if not response:
return False
# 模拟真实的页面浏览行为
self._simulate_realistic_browsing(response, is_main_page=True)
return True
except Exception as e:
logger.error(f"主网站访问失败: {e}")
return False
def visit_game_page_realistic(self):
"""真实模拟访问游戏页面"""
game_page = self.config['targets']['game_page']
main_site = self.config['targets']['main_site']
try:
logger.info(f"🎮 访问游戏页面: {game_page}")
# 更新referrer为主站
self.session.headers.update({
'Referer': main_site,
'Sec-Fetch-Site': 'same-origin'
})
# 发起请求
response = self.make_realistic_request(game_page)
if not response:
return False
# 模拟真实的游戏页面行为
self._simulate_realistic_gaming(response)
return True
except Exception as e:
logger.error(f"游戏页面访问失败: {e}")
return False
def make_realistic_request(self, url, timeout=10):
"""发起真实的HTTP请求"""
try:
# 添加随机的人为延迟
pre_request_delay = self.user_db.simulate_human_delays("thinking")
time.sleep(pre_request_delay)
response = self.session.get(url, timeout=timeout, allow_redirects=True)
# 记录详细信息
logger.info(f"📡 访问 {url}")
logger.info(f" 状态码: {response.status_code}")
logger.info(f" 响应大小: {len(response.content)} 字节")
logger.info(f" 响应时间: {response.elapsed.total_seconds():.2f}")
if response.headers.get('content-type'):
logger.info(f" 内容类型: {response.headers.get('content-type')}")
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
logger.error(f"请求失败 {url}: {e}")
return None
def _simulate_realistic_browsing(self, response, is_main_page=False):
"""模拟真实的页面浏览行为"""
content = response.text
# 估算页面内容长度和阅读时间
text_length = len(re.sub(r'<[^>]+>', '', content))
reading_time = text_length / self.current_behavior['reading_speed'] * 60 # 转换为秒
logger.info(f"📖 页面内容长度: {text_length} 字符")
logger.info(f"📖 预估阅读时间: {reading_time:.1f}")
# 获取真实的停留时间
if is_main_page:
base_time = random.uniform(*self.config['settings']['main_site_stay_time'])
else:
base_time = reading_time
stay_time = self.user_db.get_realistic_timing(base_time, self.current_behavior)
logger.info(f"⏱️ 实际停留时间: {stay_time:.1f}")
# 模拟分段浏览
self._simulate_browsing_segments(stay_time, content)
def _simulate_browsing_segments(self, total_time, content):
"""模拟分段浏览行为"""
# 查找页面中的链接
links = re.findall(r'href=["\'](.*?)["\']', content)
internal_links = [link for link in links if not link.startswith('http') or self.config['targets']['main_site'] in link]
segments = random.randint(3, 8)
segment_time = total_time / segments
browsing_actions = [
"阅读页面内容",
"查看导航菜单",
"滚动浏览",
"查看页脚信息",
"检查页面链接",
"观察页面布局"
]
for i in range(segments):
action = random.choice(browsing_actions)
logger.info(f" 浏览行为: {action}")
# 根据行为类型调整时间
if action == "滚动浏览":
self._simulate_scrolling_behavior(segment_time)
elif action == "检查页面链接" and internal_links:
self._simulate_link_hovering(internal_links)
time.sleep(segment_time * 0.8)
else:
# 添加人为的不规律性
actual_segment_time = segment_time * random.uniform(0.7, 1.3)
time.sleep(actual_segment_time)
def _simulate_scrolling_behavior(self, duration):
"""模拟真实的滚动行为"""
scroll_sessions = random.randint(2, 5)
session_time = duration / scroll_sessions
for session in range(scroll_sessions):
logger.info(f" 滚动会话 {session + 1}")
# 模拟快速滚动
quick_scrolls = random.randint(2, 4)
for _ in range(quick_scrolls):
time.sleep(random.uniform(0.2, 0.8))
# 模拟停顿阅读
if random.random() < 0.7: # 70%概率停顿阅读
pause_time = random.uniform(1, 4)
logger.info(f" 停顿阅读 {pause_time:.1f}")
time.sleep(pause_time)
def _simulate_link_hovering(self, links):
"""模拟鼠标悬停在链接上"""
hover_count = min(random.randint(1, 3), len(links))
sample_links = random.sample(links, hover_count)
for link in sample_links:
logger.info(f" 查看链接: {link[:50]}...")
time.sleep(random.uniform(0.5, 2))
def _simulate_realistic_gaming(self, response):
"""模拟真实的游戏行为"""
logger.info("🎲 开始模拟2048游戏")
# 游戏前的准备时间
prep_time = self.user_db.simulate_human_delays("thinking")
logger.info(f"🤔 游戏准备时间: {prep_time:.1f}")
time.sleep(prep_time)
# 获取游戏停留时间
base_time = random.uniform(*self.config['settings']['game_page_stay_time'])
game_time = self.user_db.get_realistic_timing(base_time, self.current_behavior)
logger.info(f"🎮 游戏总时长: {game_time:.1f}")
# 模拟游戏过程
self._simulate_game_sessions(game_time)
def _simulate_game_sessions(self, total_time):
"""模拟游戏会话"""
# 将游戏时间分成多个会话
sessions = random.randint(2, 5)
for session in range(sessions):
session_time = total_time / sessions * random.uniform(0.8, 1.2)
logger.info(f"🎯 游戏会话 {session + 1}, 时长: {session_time:.1f}")
self._simulate_single_game_session(session_time)
# 会话间休息
if session < sessions - 1:
break_time = random.uniform(2, 8)
logger.info(f"⏸️ 会话间休息: {break_time:.1f}")
time.sleep(break_time)
def _simulate_single_game_session(self, session_time):
"""模拟单个游戏会话"""
game_moves = ["⬆️上移", "⬇️下移", "⬅️左移", "➡️右移"]
start_time = time.time()
move_count = 0
while time.time() - start_time < session_time:
# 随机选择移动方向
move = random.choice(game_moves)
move_count += 1
logger.info(f"{move_count}步: {move}")
# 模拟不同难度的思考时间
if move_count % 10 == 0: # 每10步深度思考
think_time = random.uniform(3, 8)
logger.info(f" 深度思考: {think_time:.1f}")
time.sleep(think_time)
elif random.random() < 0.3: # 30%概率短暂思考
think_time = random.uniform(0.5, 2)
time.sleep(think_time)
else: # 快速移动
time.sleep(random.uniform(0.3, 1))
# 模拟偶尔的错误操作
if random.random() < 0.05: # 5%概率
logger.info(" 误操作,快速纠正")
time.sleep(0.2)
logger.info(f" 纠正: {random.choice(game_moves)}")
time.sleep(random.uniform(0.3, 0.8))
def run_single_visit(self):
"""执行一次完整的真实访问流程"""
logger.info("🚀 开始执行真实用户访问流程")
# 设置会话
self.setup_session()
try:
# 1. 模拟真实的GitHub访问
if not self.simulate_realistic_github_visit():
logger.warning("GitHub访问模拟失败继续执行")
# 2. 访问主网站
if not self.visit_main_site_realistic():
logger.error("主网站访问失败")
return False
# 3. 访问游戏页面
if not self.visit_game_page_realistic():
logger.error("游戏页面访问失败")
return False
logger.info("✅ 真实访问流程执行成功")
return True
except Exception as e:
logger.error(f"访问流程执行出错: {e}")
return False
finally:
# 清理资源
if self.session:
self.session.close()
def run_continuous(self, total_visits=None, delay_range=None):
"""连续执行多次真实访问"""
if total_visits is None:
total_visits = self.config['settings']['default_visits']
if delay_range is None:
delay_range = (
self.config['settings']['min_delay'],
self.config['settings']['max_delay']
)
success_count = 0
for i in range(total_visits):
logger.info(f"🔄 执行第 {i+1}/{total_visits} 次真实访问")
if self.run_single_visit():
success_count += 1
# 智能延迟(根据时间模式调整)
if i < total_visits - 1:
base_delay = random.uniform(delay_range[0], delay_range[1])
behavior = self.user_db.get_visit_behavior()
# 根据访问模式调整延迟
if behavior['pattern_type'] == "工作时间":
delay = base_delay * 0.8 # 工作时间间隔较短
elif behavior['pattern_type'] == "深夜":
delay = base_delay * 1.5 # 深夜间隔较长
else:
delay = base_delay
logger.info(f"⏳ 智能延迟 {delay:.1f} 秒 (模式: {behavior['pattern_type']})")
time.sleep(delay)
logger.info(f"🎉 真实访问完成,成功: {success_count}/{total_visits}")
return success_count
def main():
"""主函数"""
config_file = 'config.json'
if not os.path.exists(config_file):
print(f"❌ 配置文件 {config_file} 不存在!")
return
try:
bot = WebTrafficBotRealistic(config_file)
print("=== 网站流量模拟脚本 (真实用户行为版) ===")
print("🎭 使用真实用户数据库,模拟最真实的访问轨迹")
print("⚠️ 请确保仅用于测试自己的网站!")
print("目标网站:", bot.config['targets']['main_site'])
print("游戏页面:", bot.config['targets']['game_page'])
print()
print("请选择运行模式:")
print("1. 单次真实访问测试")
print("2. 连续真实访问模式 (使用配置文件设置)")
print("3. 连续真实访问模式 (自定义参数)")
choice = input("请输入选择 (1/2/3): ").strip()
if choice == "1":
logger.info("开始单次真实访问测试")
success = bot.run_single_visit()
if success:
print("✅ 单次真实访问测试成功!")
else:
print("❌ 单次真实访问测试失败!")
elif choice == "2":
logger.info("开始连续真实访问(配置文件模式)")
success_count = bot.run_continuous()
print(f"✅ 连续真实访问完成!成功: {success_count}/{bot.config['settings']['default_visits']}")
elif choice == "3":
try:
visit_count = int(input("请输入访问次数: ").strip())
min_delay = int(input("请输入最小延迟秒数: ").strip())
max_delay = int(input("请输入最大延迟秒数: ").strip())
logger.info(f"开始连续真实访问,总次数: {visit_count}")
success_count = bot.run_continuous(
total_visits=visit_count,
delay_range=(min_delay, max_delay)
)
print(f"✅ 连续真实访问完成!成功: {success_count}/{visit_count}")
except ValueError:
print("❌ 输入参数错误!")
else:
print("❌ 无效选择!")
except KeyboardInterrupt:
print("\n⚠️ 用户中断执行")
except Exception as e:
logger.error(f"程序执行出错: {e}")
print("❌ 程序执行出错,请检查日志文件 traffic_bot_realistic.log")
if __name__ == "__main__":
main()