From d523609b128680ea90837d11a42059f2b3ed3f0a Mon Sep 17 00:00:00 2001 From: huangzhenpc Date: Wed, 26 Feb 2025 11:41:37 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E9=82=AE=E4=BB=B6=E8=A7=A3?= =?UTF-8?q?=E7=A0=81=E5=B7=A5=E5=85=B7=20decode=5Femail.py=EF=BC=8C?= =?UTF-8?q?=E7=94=A8=E4=BA=8E=E8=A7=A3=E6=9E=90=E5=92=8C=E6=98=BE=E7=A4=BA?= =?UTF-8?q?.eml=E6=96=87=E4=BB=B6=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- decode_email.py | 265 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 decode_email.py diff --git a/decode_email.py b/decode_email.py new file mode 100644 index 0000000..ed30ce0 --- /dev/null +++ b/decode_email.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +邮件解码工具 +用于解析.eml文件并显示可读的邮件内容 +""" + +import base64 +import sys +import re +import email +from email import policy +import os +import argparse +import html + +def decode_eml_file(filename): + """解析并显示.eml文件的内容""" + print(f"解析邮件文件: {filename}") + + # 如果文件不存在 + if not os.path.exists(filename): + print(f"错误: 文件不存在 {filename}") + return False + + # 解析.eml文件 + try: + with open(filename, 'r', encoding='utf-8', errors='replace') as f: + msg = email.message_from_file(f, policy=policy.default) + except Exception as e: + print(f"读取文件错误: {str(e)}") + return False + + # 显示邮件头信息 + print("\n===== 邮件头信息 =====") + print(f"主题: {msg.get('Subject', '无主题')}") + print(f"发件人: {msg.get('From', '未知')}") + print(f"收件人: {msg.get('To', '未知')}") + print(f"日期: {msg.get('Date', '未知')}") + + # 提取并显示邮件内容 + print("\n===== 邮件内容 =====") + + body_text = "" + body_html = "" + + # 处理多部分邮件 + if msg.is_multipart(): + for part in msg.iter_parts(): + content_type = part.get_content_type() + + if content_type == "text/plain": + try: + body_text = part.get_content() + except Exception as e: + print(f"解析纯文本内容出错: {str(e)}") + payload = part.get_payload(decode=True) + if payload: + charset = part.get_content_charset() or 'utf-8' + try: + body_text = payload.decode(charset, errors='replace') + except: + body_text = payload.decode('utf-8', errors='replace') + + elif content_type == "text/html": + try: + body_html = part.get_content() + except Exception as e: + print(f"解析HTML内容出错: {str(e)}") + payload = part.get_payload(decode=True) + if payload: + charset = part.get_content_charset() or 'utf-8' + try: + body_html = payload.decode(charset, errors='replace') + except: + body_html = payload.decode('utf-8', errors='replace') + else: + # 处理单部分邮件 + content_type = msg.get_content_type() + try: + if content_type == "text/plain": + body_text = msg.get_content() + elif content_type == "text/html": + body_html = msg.get_content() + else: + print(f"未知内容类型: {content_type}") + try: + # 尝试作为纯文本处理 + body_text = msg.get_content() + except: + pass + except Exception as e: + print(f"解析邮件内容出错: {str(e)}") + payload = msg.get_payload(decode=True) + if payload: + charset = msg.get_content_charset() or 'utf-8' + try: + decoded = payload.decode(charset, errors='replace') + if content_type == "text/plain": + body_text = decoded + elif content_type == "text/html": + body_html = decoded + else: + body_text = decoded + except: + body_text = payload.decode('utf-8', errors='replace') + + # 显示纯文本内容 + if body_text: + print("\n----- 纯文本内容 -----") + print(body_text) + + # 显示HTML内容 (可选,HTML内容通常很长) + if body_html: + print("\n----- HTML内容摘要 -----") + # 只显示HTML内容的前500个字符 + print(body_html[:500] + "..." if len(body_html) > 500 else body_html) + + # 尝试提取验证码 + verification_code = None + + # 从HTML内容中提取 + if body_html: + # 尝试多种正则表达式匹配可能的验证码格式 + patterns = [ + r'letter-spacing:\s*\d+px[^>]*>([^<]+)<', # 通常验证码有特殊样式 + r']*>(\d{4,8})', # 数字在div中 + r'验证码[::]\s*([A-Z0-9]{4,8})', # 中文标记的验证码 + r'code[^\d]+(\d{4,8})', # 英文标记的验证码 + r'\b([A-Z0-9]{6})\b' # 6位大写字母或数字 + ] + + for pattern in patterns: + matches = re.findall(pattern, body_html) + if matches: + verification_code = matches[0].strip() + break + + # 从纯文本中提取 + if not verification_code and body_text: + patterns = [ + r'验证码[::]\s*([A-Z0-9]{4,8})', # 中文格式 + r'code[^\d]+(\d{4,8})', # 英文格式 + r'\b(\d{6})\b' # 6位数字 + ] + + for pattern in patterns: + matches = re.findall(pattern, body_text) + if matches: + verification_code = matches[0].strip() + break + + # 显示提取到的验证码 + if verification_code: + print("\n===== 提取结果 =====") + print(f"验证码: {verification_code}") + + # 尝试提取验证链接 + verification_link = None + if body_html: + link_match = re.search(r'href=[\'"]([^\'"]*(?:verify|confirm|activate)[^\'"]*)[\'"]', body_html) + if link_match: + verification_link = link_match.group(1) + + if not verification_link and body_text: + link_match = re.search(r'https?://\S+?(?:verify|confirm|activate)\S+', body_text) + if link_match: + verification_link = link_match.group(0) + + if verification_link: + print(f"验证链接: {verification_link}") + + return True + +def decode_all_emails(directory): + """解析指定目录下的所有.eml文件""" + print(f"扫描目录: {directory}") + email_files = [] + + # 遍历目录 + for root, dirs, files in os.walk(directory): + for file in files: + if file.endswith(".eml"): + email_files.append(os.path.join(root, file)) + + # 按修改时间排序 + email_files.sort(key=lambda x: os.path.getmtime(x), reverse=True) + + print(f"找到 {len(email_files)} 个邮件文件") + + # 如果文件过多,只显示最新的几个 + if len(email_files) > 5: + print("只显示最新的5封邮件") + email_files = email_files[:5] + + # 解析每个文件 + for i, email_file in enumerate(email_files, 1): + print(f"\n\n======= 邮件 {i}/{len(email_files)} =======") + print(f"文件: {email_file}") + decode_eml_file(email_file) + +def main(): + parser = argparse.ArgumentParser(description='邮件解码工具') + parser.add_argument('path', nargs='?', help='邮件文件路径或目录路径') + parser.add_argument('--all', action='store_true', help='解码所有找到的邮件') + + args = parser.parse_args() + + # 如果指定了--all参数,扫描email_data目录 + if args.all: + email_data_dir = 'email_data' + if not os.path.exists(email_data_dir): + email_data_dir = os.path.join(os.getcwd(), 'email_data') + + if not os.path.exists(email_data_dir): + print(f"错误: 找不到邮件数据目录 {email_data_dir}") + return 1 + + decode_all_emails(email_data_dir) + return 0 + + # 如果指定了路径 + if args.path: + # 检查是文件还是目录 + if os.path.isfile(args.path): + return 0 if decode_eml_file(args.path) else 1 + elif os.path.isdir(args.path): + decode_all_emails(args.path) + return 0 + else: + print(f"错误: 路径不存在 {args.path}") + return 1 + + # 如果没有指定路径,尝试找到最新的邮件 + email_data_dir = 'email_data' + if not os.path.exists(email_data_dir): + email_data_dir = os.path.join(os.getcwd(), 'email_data') + + if not os.path.exists(email_data_dir): + print(f"错误: 找不到邮件数据目录 {email_data_dir}") + print("请指定邮件文件路径: python decode_email.py ") + return 1 + + # 查找最新的邮件文件 + latest_email = None + latest_time = 0 + + for root, dirs, files in os.walk(email_data_dir): + for file in files: + if file.endswith(".eml"): + file_path = os.path.join(root, file) + mtime = os.path.getmtime(file_path) + if mtime > latest_time: + latest_time = mtime + latest_email = file_path + + if latest_email: + print(f"解析最新的邮件文件: {latest_email}") + return 0 if decode_eml_file(latest_email) else 1 + else: + print(f"找不到任何.eml文件在 {email_data_dir}") + return 1 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file