emailsystem/app/services/email_processor.py

import logging
import re
import threading
import time
from queue import Queue

logger = logging.getLogger(__name__)

class EmailProcessor:
    """邮件处理器，负责处理邮件并提取验证信息"""

    def __init__(self, mail_store):
        """
        初始化邮件处理器

        参数:
            mail_store: 邮件存储服务实例
        """
        self.mail_store = mail_store
        self.processing_queue = Queue()
        self.is_running = False
        self.worker_thread = None

    def start(self):
        """启动邮件处理器"""
        if self.is_running:
            logger.warning("邮件处理器已在运行")
            return False

        self.is_running = True
        self.worker_thread = threading.Thread(
            target=self._processing_worker,
            daemon=True
        )
        self.worker_thread.start()
        logger.info("邮件处理器已启动")
        return True

    def stop(self):
        """停止邮件处理器"""
        if not self.is_running:
            logger.warning("邮件处理器未在运行")
            return False

        self.is_running = False
        if self.worker_thread:
            self.worker_thread.join(timeout=5.0)
            self.worker_thread = None

        logger.info("邮件处理器已停止")
        return True

    def queue_email_for_processing(self, email_id):
        """将邮件添加到处理队列"""
        self.processing_queue.put(email_id)
        return True

    def _processing_worker(self):
        """处理队列中的邮件的工作线程"""
        while self.is_running:
            try:
                # 获取队列中的邮件，最多等待1秒
                try:
                    email_id = self.processing_queue.get(timeout=1.0)
                except:
                    continue

                # 处理邮件
                self._process_email(email_id)

                # 标记任务完成
                self.processing_queue.task_done()

            except Exception as e:
                logger.error(f"处理邮件时出错: {str(e)}")

    def _process_email(self, email_id):
        """处理单个邮件，提取验证码和链接"""
        # 从邮件存储获取邮件
        email_data = self.mail_store.get_email_by_id(email_id, mark_as_read=False)
        if not email_data:
            logger.warning(f"找不到ID为 {email_id} 的邮件")
            return False

        # 提取验证码和链接已经在Email模型的extract_verification_data方法中实现
        # 这里可以添加更复杂的提取逻辑或后处理

        logger.info(f"邮件 {email_id} 处理完成")
        return True

    @staticmethod
    def extract_verification_code(content):
        """从内容中提取验证码"""
        code_patterns = [
            r'\b[A-Z0-9]{4,8}\b',  # 基本验证码格式
            r'验证码[：:]\s*([A-Z0-9]{4,8})',
            r'验证码是[：:]\s*([A-Z0-9]{4,8})',
            r'code[：:]\s*([A-Z0-9]{4,8})',
            r'码[：:]\s*(\d{4,8})'  # 纯数字验证码
        ]

        for pattern in code_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            if matches:
                return matches[0]

        return None

    @staticmethod
    def extract_verification_link(content):
        """从内容中提取验证链接"""
        link_patterns = [
            r'(https?://\S+(?:verify|confirm|activate)\S+)',
            r'(https?://\S+(?:token|auth|account)\S+)',
            r'href\s*=\s*["\']([^"\']+(?:verify|confirm|activate)[^"\']*)["\']'
        ]

        for pattern in link_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            if matches:
                return matches[0]

        return None