import logging import re import threading import time from queue import Queue logger = logging.getLogger(__name__) class EmailProcessor: """邮件处理器,负责处理邮件并提取验证信息""" def __init__(self, mail_store): """ 初始化邮件处理器 参数: mail_store: 邮件存储服务实例 """ self.mail_store = mail_store self.processing_queue = Queue() self.is_running = False self.worker_thread = None def start(self): """启动邮件处理器""" if self.is_running: logger.warning("邮件处理器已在运行") return False self.is_running = True self.worker_thread = threading.Thread( target=self._processing_worker, daemon=True ) self.worker_thread.start() logger.info("邮件处理器已启动") return True def stop(self): """停止邮件处理器""" if not self.is_running: logger.warning("邮件处理器未在运行") return False self.is_running = False if self.worker_thread: self.worker_thread.join(timeout=5.0) self.worker_thread = None logger.info("邮件处理器已停止") return True def queue_email_for_processing(self, email_id): """将邮件添加到处理队列""" self.processing_queue.put(email_id) return True def _processing_worker(self): """处理队列中的邮件的工作线程""" while self.is_running: try: # 获取队列中的邮件,最多等待1秒 try: email_id = self.processing_queue.get(timeout=1.0) except: continue # 处理邮件 self._process_email(email_id) # 标记任务完成 self.processing_queue.task_done() except Exception as e: logger.error(f"处理邮件时出错: {str(e)}") def _process_email(self, email_id): """处理单个邮件,提取验证码和链接""" # 从邮件存储获取邮件 email_data = self.mail_store.get_email_by_id(email_id, mark_as_read=False) if not email_data: logger.warning(f"找不到ID为 {email_id} 的邮件") return False # 提取验证码和链接已经在Email模型的extract_verification_data方法中实现 # 这里可以添加更复杂的提取逻辑或后处理 logger.info(f"邮件 {email_id} 处理完成") return True @staticmethod def extract_verification_code(content): """从内容中提取验证码""" code_patterns = [ r'\b[A-Z0-9]{4,8}\b', # 基本验证码格式 r'验证码[::]\s*([A-Z0-9]{4,8})', r'验证码是[::]\s*([A-Z0-9]{4,8})', r'code[::]\s*([A-Z0-9]{4,8})', r'码[::]\s*(\d{4,8})' # 纯数字验证码 ] for pattern in code_patterns: matches = re.findall(pattern, content, re.IGNORECASE) if matches: return matches[0] return None @staticmethod def extract_verification_link(content): """从内容中提取验证链接""" link_patterns = [ r'(https?://\S+(?:verify|confirm|activate)\S+)', r'(https?://\S+(?:token|auth|account)\S+)', r'href\s*=\s*["\']([^"\']+(?:verify|confirm|activate)[^"\']*)["\']' ] for pattern in link_patterns: matches = re.findall(pattern, content, re.IGNORECASE) if matches: return matches[0] return None