Files
emailsystem/decode_email.py

265 lines
9.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
邮件解码工具
用于解析.eml文件并显示可读的邮件内容
"""
import base64
import sys
import re
import email
from email import policy
import os
import argparse
import html
def decode_eml_file(filename):
"""解析并显示.eml文件的内容"""
print(f"解析邮件文件: {filename}")
# 如果文件不存在
if not os.path.exists(filename):
print(f"错误: 文件不存在 {filename}")
return False
# 解析.eml文件
try:
with open(filename, 'r', encoding='utf-8', errors='replace') as f:
msg = email.message_from_file(f, policy=policy.default)
except Exception as e:
print(f"读取文件错误: {str(e)}")
return False
# 显示邮件头信息
print("\n===== 邮件头信息 =====")
print(f"主题: {msg.get('Subject', '无主题')}")
print(f"发件人: {msg.get('From', '未知')}")
print(f"收件人: {msg.get('To', '未知')}")
print(f"日期: {msg.get('Date', '未知')}")
# 提取并显示邮件内容
print("\n===== 邮件内容 =====")
body_text = ""
body_html = ""
# 处理多部分邮件
if msg.is_multipart():
for part in msg.iter_parts():
content_type = part.get_content_type()
if content_type == "text/plain":
try:
body_text = part.get_content()
except Exception as e:
print(f"解析纯文本内容出错: {str(e)}")
payload = part.get_payload(decode=True)
if payload:
charset = part.get_content_charset() or 'utf-8'
try:
body_text = payload.decode(charset, errors='replace')
except:
body_text = payload.decode('utf-8', errors='replace')
elif content_type == "text/html":
try:
body_html = part.get_content()
except Exception as e:
print(f"解析HTML内容出错: {str(e)}")
payload = part.get_payload(decode=True)
if payload:
charset = part.get_content_charset() or 'utf-8'
try:
body_html = payload.decode(charset, errors='replace')
except:
body_html = payload.decode('utf-8', errors='replace')
else:
# 处理单部分邮件
content_type = msg.get_content_type()
try:
if content_type == "text/plain":
body_text = msg.get_content()
elif content_type == "text/html":
body_html = msg.get_content()
else:
print(f"未知内容类型: {content_type}")
try:
# 尝试作为纯文本处理
body_text = msg.get_content()
except:
pass
except Exception as e:
print(f"解析邮件内容出错: {str(e)}")
payload = msg.get_payload(decode=True)
if payload:
charset = msg.get_content_charset() or 'utf-8'
try:
decoded = payload.decode(charset, errors='replace')
if content_type == "text/plain":
body_text = decoded
elif content_type == "text/html":
body_html = decoded
else:
body_text = decoded
except:
body_text = payload.decode('utf-8', errors='replace')
# 显示纯文本内容
if body_text:
print("\n----- 纯文本内容 -----")
print(body_text)
# 显示HTML内容 (可选HTML内容通常很长)
if body_html:
print("\n----- HTML内容摘要 -----")
# 只显示HTML内容的前500个字符
print(body_html[:500] + "..." if len(body_html) > 500 else body_html)
# 尝试提取验证码
verification_code = None
# 从HTML内容中提取
if body_html:
# 尝试多种正则表达式匹配可能的验证码格式
patterns = [
r'letter-spacing:\s*\d+px[^>]*>([^<]+)<', # 通常验证码有特殊样式
r'<div[^>]*>(\d{4,8})</div>', # 数字在div中
r'验证码[:]\s*([A-Z0-9]{4,8})', # 中文标记的验证码
r'code[^\d]+(\d{4,8})', # 英文标记的验证码
r'\b([A-Z0-9]{6})\b' # 6位大写字母或数字
]
for pattern in patterns:
matches = re.findall(pattern, body_html)
if matches:
verification_code = matches[0].strip()
break
# 从纯文本中提取
if not verification_code and body_text:
patterns = [
r'验证码[:]\s*([A-Z0-9]{4,8})', # 中文格式
r'code[^\d]+(\d{4,8})', # 英文格式
r'\b(\d{6})\b' # 6位数字
]
for pattern in patterns:
matches = re.findall(pattern, body_text)
if matches:
verification_code = matches[0].strip()
break
# 显示提取到的验证码
if verification_code:
print("\n===== 提取结果 =====")
print(f"验证码: {verification_code}")
# 尝试提取验证链接
verification_link = None
if body_html:
link_match = re.search(r'href=[\'"]([^\'"]*(?:verify|confirm|activate)[^\'"]*)[\'"]', body_html)
if link_match:
verification_link = link_match.group(1)
if not verification_link and body_text:
link_match = re.search(r'https?://\S+?(?:verify|confirm|activate)\S+', body_text)
if link_match:
verification_link = link_match.group(0)
if verification_link:
print(f"验证链接: {verification_link}")
return True
def decode_all_emails(directory):
"""解析指定目录下的所有.eml文件"""
print(f"扫描目录: {directory}")
email_files = []
# 遍历目录
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(".eml"):
email_files.append(os.path.join(root, file))
# 按修改时间排序
email_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
print(f"找到 {len(email_files)} 个邮件文件")
# 如果文件过多,只显示最新的几个
if len(email_files) > 5:
print("只显示最新的5封邮件")
email_files = email_files[:5]
# 解析每个文件
for i, email_file in enumerate(email_files, 1):
print(f"\n\n======= 邮件 {i}/{len(email_files)} =======")
print(f"文件: {email_file}")
decode_eml_file(email_file)
def main():
parser = argparse.ArgumentParser(description='邮件解码工具')
parser.add_argument('path', nargs='?', help='邮件文件路径或目录路径')
parser.add_argument('--all', action='store_true', help='解码所有找到的邮件')
args = parser.parse_args()
# 如果指定了--all参数扫描email_data目录
if args.all:
email_data_dir = 'email_data'
if not os.path.exists(email_data_dir):
email_data_dir = os.path.join(os.getcwd(), 'email_data')
if not os.path.exists(email_data_dir):
print(f"错误: 找不到邮件数据目录 {email_data_dir}")
return 1
decode_all_emails(email_data_dir)
return 0
# 如果指定了路径
if args.path:
# 检查是文件还是目录
if os.path.isfile(args.path):
return 0 if decode_eml_file(args.path) else 1
elif os.path.isdir(args.path):
decode_all_emails(args.path)
return 0
else:
print(f"错误: 路径不存在 {args.path}")
return 1
# 如果没有指定路径,尝试找到最新的邮件
email_data_dir = 'email_data'
if not os.path.exists(email_data_dir):
email_data_dir = os.path.join(os.getcwd(), 'email_data')
if not os.path.exists(email_data_dir):
print(f"错误: 找不到邮件数据目录 {email_data_dir}")
print("请指定邮件文件路径: python decode_email.py <eml文件路径>")
return 1
# 查找最新的邮件文件
latest_email = None
latest_time = 0
for root, dirs, files in os.walk(email_data_dir):
for file in files:
if file.endswith(".eml"):
file_path = os.path.join(root, file)
mtime = os.path.getmtime(file_path)
if mtime > latest_time:
latest_time = mtime
latest_email = file_path
if latest_email:
print(f"解析最新的邮件文件: {latest_email}")
return 0 if decode_eml_file(latest_email) else 1
else:
print(f"找不到任何.eml文件在 {email_data_dir}")
return 1
if __name__ == "__main__":
sys.exit(main())