Files
shualiangv1/user_agent_diagnostic.py
huangzhenpc 4db06ac517 正式22
2025-07-18 11:23:13 +08:00

164 lines
5.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
User-Agent诊断工具
检查代理是否修改了User-Agent头部
"""
import requests
import json
import time
import random
from real_user_database import RealUserDatabase
def test_user_agent_with_proxy():
"""测试代理对User-Agent的影响"""
print("🔍 User-Agent诊断工具")
print("=" * 50)
# 加载配置
try:
with open('config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
except FileNotFoundError:
print("❌ 配置文件config.json不存在")
return
# 生成真实用户配置
user_db = RealUserDatabase()
profile = user_db.get_random_user_profile()
real_user_agent = profile["user_agent"]
print(f"📱 生成的真实User-Agent:")
print(f" {real_user_agent}")
print()
# 测试服务列表
test_services = [
"https://httpbin.org/headers",
"https://api.ipify.org?format=json",
"https://ipinfo.io/json",
"https://ifconfig.me/all.json",
]
# 代理配置
proxy_config = config.get('proxy')
if proxy_config:
proxy_url = f"http://{proxy_config['username']}:{proxy_config['password']}@{proxy_config['host']}:{proxy_config['port']}"
proxies = {
'http': proxy_url,
'https': proxy_url
}
print(f"🌐 使用代理: {proxy_config['host']}:{proxy_config['port']}")
else:
proxies = None
print("🏠 不使用代理")
print()
# 测试每个服务
for i, service in enumerate(test_services):
print(f"🔍 测试服务 {i+1}: {service}")
session = requests.Session()
if proxies:
session.proxies = proxies
# 设置完整的真实浏览器头部
headers = {
"User-Agent": real_user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
"DNT": "1",
}
session.headers.update(headers)
try:
response = session.get(service, timeout=15)
if response.status_code == 200:
print(f"✅ 请求成功 ({response.status_code})")
# 解析响应
try:
data = response.json()
# 检查User-Agent
if 'headers' in data and 'User-Agent' in data['headers']:
received_ua = data['headers']['User-Agent']
print(f"📱 服务器收到的User-Agent:")
print(f" {received_ua}")
if received_ua == real_user_agent:
print("✅ User-Agent正确传递")
else:
print("❌ User-Agent被修改了")
print(f" 期望: {real_user_agent}")
print(f" 实际: {received_ua}")
# 显示其他信息
if 'ip' in data:
print(f"🌍 IP地址: {data['ip']}")
elif 'origin' in data:
print(f"🌍 IP地址: {data['origin']}")
if 'headers' in data:
important_headers = ['Accept', 'Accept-Language', 'Accept-Encoding']
for header in important_headers:
if header in data['headers']:
print(f"📋 {header}: {data['headers'][header]}")
except json.JSONDecodeError:
print("⚠️ 响应不是JSON格式")
print(f"响应内容: {response.text[:200]}...")
else:
print(f"❌ 请求失败 ({response.status_code})")
except requests.exceptions.RequestException as e:
print(f"❌ 请求异常: {e}")
print("-" * 50)
time.sleep(2) # 避免频繁请求
print("\n🎯 诊断建议:")
print("如果User-Agent被修改可能的原因和解决方案")
print("1. 代理服务器修改了User-Agent")
print(" - 联系代理服务商")
print(" - 尝试更换代理服务器")
print("2. 代理服务商的反检测策略")
print(" - 使用更高级的代理服务")
print(" - 考虑使用住宅代理")
print("3. 请求被中间件拦截")
print(" - 检查网络环境")
print(" - 尝试直连测试")
def test_multiple_user_agents():
"""测试多个不同的User-Agent"""
print("\n🔬 多User-Agent测试")
print("=" * 50)
user_db = RealUserDatabase()
# 生成5个不同的User-Agent
for i in range(5):
profile = user_db.get_random_user_profile()
ua = profile["user_agent"]
os_info = profile["operating_system"]
print(f"🎭 User-Agent {i+1} ({os_info}):")
print(f" {ua}")
print()
if __name__ == "__main__":
test_user_agent_with_proxy()
test_multiple_user_agents()