diff --git a/decode_email.py b/decode_email.py
new file mode 100644
index 0000000..ed30ce0
--- /dev/null
+++ b/decode_email.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+邮件解码工具
+用于解析.eml文件并显示可读的邮件内容
+"""
+
+import base64
+import sys
+import re
+import email
+from email import policy
+import os
+import argparse
+import html
+
+def decode_eml_file(filename):
+ """解析并显示.eml文件的内容"""
+ print(f"解析邮件文件: {filename}")
+
+ # 如果文件不存在
+ if not os.path.exists(filename):
+ print(f"错误: 文件不存在 {filename}")
+ return False
+
+ # 解析.eml文件
+ try:
+ with open(filename, 'r', encoding='utf-8', errors='replace') as f:
+ msg = email.message_from_file(f, policy=policy.default)
+ except Exception as e:
+ print(f"读取文件错误: {str(e)}")
+ return False
+
+ # 显示邮件头信息
+ print("\n===== 邮件头信息 =====")
+ print(f"主题: {msg.get('Subject', '无主题')}")
+ print(f"发件人: {msg.get('From', '未知')}")
+ print(f"收件人: {msg.get('To', '未知')}")
+ print(f"日期: {msg.get('Date', '未知')}")
+
+ # 提取并显示邮件内容
+ print("\n===== 邮件内容 =====")
+
+ body_text = ""
+ body_html = ""
+
+ # 处理多部分邮件
+ if msg.is_multipart():
+ for part in msg.iter_parts():
+ content_type = part.get_content_type()
+
+ if content_type == "text/plain":
+ try:
+ body_text = part.get_content()
+ except Exception as e:
+ print(f"解析纯文本内容出错: {str(e)}")
+ payload = part.get_payload(decode=True)
+ if payload:
+ charset = part.get_content_charset() or 'utf-8'
+ try:
+ body_text = payload.decode(charset, errors='replace')
+ except:
+ body_text = payload.decode('utf-8', errors='replace')
+
+ elif content_type == "text/html":
+ try:
+ body_html = part.get_content()
+ except Exception as e:
+ print(f"解析HTML内容出错: {str(e)}")
+ payload = part.get_payload(decode=True)
+ if payload:
+ charset = part.get_content_charset() or 'utf-8'
+ try:
+ body_html = payload.decode(charset, errors='replace')
+ except:
+ body_html = payload.decode('utf-8', errors='replace')
+ else:
+ # 处理单部分邮件
+ content_type = msg.get_content_type()
+ try:
+ if content_type == "text/plain":
+ body_text = msg.get_content()
+ elif content_type == "text/html":
+ body_html = msg.get_content()
+ else:
+ print(f"未知内容类型: {content_type}")
+ try:
+ # 尝试作为纯文本处理
+ body_text = msg.get_content()
+ except:
+ pass
+ except Exception as e:
+ print(f"解析邮件内容出错: {str(e)}")
+ payload = msg.get_payload(decode=True)
+ if payload:
+ charset = msg.get_content_charset() or 'utf-8'
+ try:
+ decoded = payload.decode(charset, errors='replace')
+ if content_type == "text/plain":
+ body_text = decoded
+ elif content_type == "text/html":
+ body_html = decoded
+ else:
+ body_text = decoded
+ except:
+ body_text = payload.decode('utf-8', errors='replace')
+
+ # 显示纯文本内容
+ if body_text:
+ print("\n----- 纯文本内容 -----")
+ print(body_text)
+
+ # 显示HTML内容 (可选,HTML内容通常很长)
+ if body_html:
+ print("\n----- HTML内容摘要 -----")
+ # 只显示HTML内容的前500个字符
+ print(body_html[:500] + "..." if len(body_html) > 500 else body_html)
+
+ # 尝试提取验证码
+ verification_code = None
+
+ # 从HTML内容中提取
+ if body_html:
+ # 尝试多种正则表达式匹配可能的验证码格式
+ patterns = [
+ r'letter-spacing:\s*\d+px[^>]*>([^<]+)<', # 通常验证码有特殊样式
+ r'
]*>(\d{4,8})
', # 数字在div中
+ r'验证码[::]\s*([A-Z0-9]{4,8})', # 中文标记的验证码
+ r'code[^\d]+(\d{4,8})', # 英文标记的验证码
+ r'\b([A-Z0-9]{6})\b' # 6位大写字母或数字
+ ]
+
+ for pattern in patterns:
+ matches = re.findall(pattern, body_html)
+ if matches:
+ verification_code = matches[0].strip()
+ break
+
+ # 从纯文本中提取
+ if not verification_code and body_text:
+ patterns = [
+ r'验证码[::]\s*([A-Z0-9]{4,8})', # 中文格式
+ r'code[^\d]+(\d{4,8})', # 英文格式
+ r'\b(\d{6})\b' # 6位数字
+ ]
+
+ for pattern in patterns:
+ matches = re.findall(pattern, body_text)
+ if matches:
+ verification_code = matches[0].strip()
+ break
+
+ # 显示提取到的验证码
+ if verification_code:
+ print("\n===== 提取结果 =====")
+ print(f"验证码: {verification_code}")
+
+ # 尝试提取验证链接
+ verification_link = None
+ if body_html:
+ link_match = re.search(r'href=[\'"]([^\'"]*(?:verify|confirm|activate)[^\'"]*)[\'"]', body_html)
+ if link_match:
+ verification_link = link_match.group(1)
+
+ if not verification_link and body_text:
+ link_match = re.search(r'https?://\S+?(?:verify|confirm|activate)\S+', body_text)
+ if link_match:
+ verification_link = link_match.group(0)
+
+ if verification_link:
+ print(f"验证链接: {verification_link}")
+
+ return True
+
+def decode_all_emails(directory):
+ """解析指定目录下的所有.eml文件"""
+ print(f"扫描目录: {directory}")
+ email_files = []
+
+ # 遍历目录
+ for root, dirs, files in os.walk(directory):
+ for file in files:
+ if file.endswith(".eml"):
+ email_files.append(os.path.join(root, file))
+
+ # 按修改时间排序
+ email_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
+
+ print(f"找到 {len(email_files)} 个邮件文件")
+
+ # 如果文件过多,只显示最新的几个
+ if len(email_files) > 5:
+ print("只显示最新的5封邮件")
+ email_files = email_files[:5]
+
+ # 解析每个文件
+ for i, email_file in enumerate(email_files, 1):
+ print(f"\n\n======= 邮件 {i}/{len(email_files)} =======")
+ print(f"文件: {email_file}")
+ decode_eml_file(email_file)
+
+def main():
+ parser = argparse.ArgumentParser(description='邮件解码工具')
+ parser.add_argument('path', nargs='?', help='邮件文件路径或目录路径')
+ parser.add_argument('--all', action='store_true', help='解码所有找到的邮件')
+
+ args = parser.parse_args()
+
+ # 如果指定了--all参数,扫描email_data目录
+ if args.all:
+ email_data_dir = 'email_data'
+ if not os.path.exists(email_data_dir):
+ email_data_dir = os.path.join(os.getcwd(), 'email_data')
+
+ if not os.path.exists(email_data_dir):
+ print(f"错误: 找不到邮件数据目录 {email_data_dir}")
+ return 1
+
+ decode_all_emails(email_data_dir)
+ return 0
+
+ # 如果指定了路径
+ if args.path:
+ # 检查是文件还是目录
+ if os.path.isfile(args.path):
+ return 0 if decode_eml_file(args.path) else 1
+ elif os.path.isdir(args.path):
+ decode_all_emails(args.path)
+ return 0
+ else:
+ print(f"错误: 路径不存在 {args.path}")
+ return 1
+
+ # 如果没有指定路径,尝试找到最新的邮件
+ email_data_dir = 'email_data'
+ if not os.path.exists(email_data_dir):
+ email_data_dir = os.path.join(os.getcwd(), 'email_data')
+
+ if not os.path.exists(email_data_dir):
+ print(f"错误: 找不到邮件数据目录 {email_data_dir}")
+ print("请指定邮件文件路径: python decode_email.py ")
+ return 1
+
+ # 查找最新的邮件文件
+ latest_email = None
+ latest_time = 0
+
+ for root, dirs, files in os.walk(email_data_dir):
+ for file in files:
+ if file.endswith(".eml"):
+ file_path = os.path.join(root, file)
+ mtime = os.path.getmtime(file_path)
+ if mtime > latest_time:
+ latest_time = mtime
+ latest_email = file_path
+
+ if latest_email:
+ print(f"解析最新的邮件文件: {latest_email}")
+ return 0 if decode_eml_file(latest_email) else 1
+ else:
+ print(f"找不到任何.eml文件在 {email_data_dir}")
+ return 1
+
+if __name__ == "__main__":
+ sys.exit(main())
\ No newline at end of file