#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 邮件解码工具 用于解析.eml文件并显示可读的邮件内容 """ import base64 import sys import re import email from email import policy import os import argparse import html def decode_eml_file(filename): """解析并显示.eml文件的内容""" print(f"解析邮件文件: {filename}") # 如果文件不存在 if not os.path.exists(filename): print(f"错误: 文件不存在 {filename}") return False # 解析.eml文件 try: with open(filename, 'r', encoding='utf-8', errors='replace') as f: msg = email.message_from_file(f, policy=policy.default) except Exception as e: print(f"读取文件错误: {str(e)}") return False # 显示邮件头信息 print("\n===== 邮件头信息 =====") print(f"主题: {msg.get('Subject', '无主题')}") print(f"发件人: {msg.get('From', '未知')}") print(f"收件人: {msg.get('To', '未知')}") print(f"日期: {msg.get('Date', '未知')}") # 提取并显示邮件内容 print("\n===== 邮件内容 =====") body_text = "" body_html = "" # 处理多部分邮件 if msg.is_multipart(): for part in msg.iter_parts(): content_type = part.get_content_type() if content_type == "text/plain": try: body_text = part.get_content() except Exception as e: print(f"解析纯文本内容出错: {str(e)}") payload = part.get_payload(decode=True) if payload: charset = part.get_content_charset() or 'utf-8' try: body_text = payload.decode(charset, errors='replace') except: body_text = payload.decode('utf-8', errors='replace') elif content_type == "text/html": try: body_html = part.get_content() except Exception as e: print(f"解析HTML内容出错: {str(e)}") payload = part.get_payload(decode=True) if payload: charset = part.get_content_charset() or 'utf-8' try: body_html = payload.decode(charset, errors='replace') except: body_html = payload.decode('utf-8', errors='replace') else: # 处理单部分邮件 content_type = msg.get_content_type() try: if content_type == "text/plain": body_text = msg.get_content() elif content_type == "text/html": body_html = msg.get_content() else: print(f"未知内容类型: {content_type}") try: # 尝试作为纯文本处理 body_text = msg.get_content() except: pass except Exception as e: print(f"解析邮件内容出错: {str(e)}") payload = msg.get_payload(decode=True) if payload: charset = msg.get_content_charset() or 'utf-8' try: decoded = payload.decode(charset, errors='replace') if content_type == "text/plain": body_text = decoded elif content_type == "text/html": body_html = decoded else: body_text = decoded except: body_text = payload.decode('utf-8', errors='replace') # 显示纯文本内容 if body_text: print("\n----- 纯文本内容 -----") print(body_text) # 显示HTML内容 (可选,HTML内容通常很长) if body_html: print("\n----- HTML内容摘要 -----") # 只显示HTML内容的前500个字符 print(body_html[:500] + "..." if len(body_html) > 500 else body_html) # 尝试提取验证码 verification_code = None # 从HTML内容中提取 if body_html: # 尝试多种正则表达式匹配可能的验证码格式 patterns = [ r'letter-spacing:\s*\d+px[^>]*>([^<]+)<', # 通常验证码有特殊样式 r']*>(\d{4,8})', # 数字在div中 r'验证码[::]\s*([A-Z0-9]{4,8})', # 中文标记的验证码 r'code[^\d]+(\d{4,8})', # 英文标记的验证码 r'\b([A-Z0-9]{6})\b' # 6位大写字母或数字 ] for pattern in patterns: matches = re.findall(pattern, body_html) if matches: verification_code = matches[0].strip() break # 从纯文本中提取 if not verification_code and body_text: patterns = [ r'验证码[::]\s*([A-Z0-9]{4,8})', # 中文格式 r'code[^\d]+(\d{4,8})', # 英文格式 r'\b(\d{6})\b' # 6位数字 ] for pattern in patterns: matches = re.findall(pattern, body_text) if matches: verification_code = matches[0].strip() break # 显示提取到的验证码 if verification_code: print("\n===== 提取结果 =====") print(f"验证码: {verification_code}") # 尝试提取验证链接 verification_link = None if body_html: link_match = re.search(r'href=[\'"]([^\'"]*(?:verify|confirm|activate)[^\'"]*)[\'"]', body_html) if link_match: verification_link = link_match.group(1) if not verification_link and body_text: link_match = re.search(r'https?://\S+?(?:verify|confirm|activate)\S+', body_text) if link_match: verification_link = link_match.group(0) if verification_link: print(f"验证链接: {verification_link}") return True def decode_all_emails(directory): """解析指定目录下的所有.eml文件""" print(f"扫描目录: {directory}") email_files = [] # 遍历目录 for root, dirs, files in os.walk(directory): for file in files: if file.endswith(".eml"): email_files.append(os.path.join(root, file)) # 按修改时间排序 email_files.sort(key=lambda x: os.path.getmtime(x), reverse=True) print(f"找到 {len(email_files)} 个邮件文件") # 如果文件过多,只显示最新的几个 if len(email_files) > 5: print("只显示最新的5封邮件") email_files = email_files[:5] # 解析每个文件 for i, email_file in enumerate(email_files, 1): print(f"\n\n======= 邮件 {i}/{len(email_files)} =======") print(f"文件: {email_file}") decode_eml_file(email_file) def main(): parser = argparse.ArgumentParser(description='邮件解码工具') parser.add_argument('path', nargs='?', help='邮件文件路径或目录路径') parser.add_argument('--all', action='store_true', help='解码所有找到的邮件') args = parser.parse_args() # 如果指定了--all参数,扫描email_data目录 if args.all: email_data_dir = 'email_data' if not os.path.exists(email_data_dir): email_data_dir = os.path.join(os.getcwd(), 'email_data') if not os.path.exists(email_data_dir): print(f"错误: 找不到邮件数据目录 {email_data_dir}") return 1 decode_all_emails(email_data_dir) return 0 # 如果指定了路径 if args.path: # 检查是文件还是目录 if os.path.isfile(args.path): return 0 if decode_eml_file(args.path) else 1 elif os.path.isdir(args.path): decode_all_emails(args.path) return 0 else: print(f"错误: 路径不存在 {args.path}") return 1 # 如果没有指定路径,尝试找到最新的邮件 email_data_dir = 'email_data' if not os.path.exists(email_data_dir): email_data_dir = os.path.join(os.getcwd(), 'email_data') if not os.path.exists(email_data_dir): print(f"错误: 找不到邮件数据目录 {email_data_dir}") print("请指定邮件文件路径: python decode_email.py ") return 1 # 查找最新的邮件文件 latest_email = None latest_time = 0 for root, dirs, files in os.walk(email_data_dir): for file in files: if file.endswith(".eml"): file_path = os.path.join(root, file) mtime = os.path.getmtime(file_path) if mtime > latest_time: latest_time = mtime latest_email = file_path if latest_email: print(f"解析最新的邮件文件: {latest_email}") return 0 if decode_eml_file(latest_email) else 1 else: print(f"找不到任何.eml文件在 {email_data_dir}") return 1 if __name__ == "__main__": sys.exit(main())