emailsystem/app/services/mail_store.py

import logging
import os
import email
from email.policy import default
from sqlalchemy.orm import Session
from datetime import datetime
import re

from ..models.domain import Domain
from ..models.mailbox import Mailbox
from ..models.email import Email
from ..models.attachment import Attachment

logger = logging.getLogger(__name__)

class MailStore:
    """邮件存储服务，负责保存和检索邮件"""

    def __init__(self, db_session_factory, storage_path=None):
        """
        初始化邮件存储服务

        参数:
            db_session_factory: 数据库会话工厂函数
            storage_path: 附件存储路径
        """
        self.db_session_factory = db_session_factory
        self.storage_path = storage_path or os.path.join(os.getcwd(), 'email_data')

        # 确保存储目录存在
        if not os.path.exists(self.storage_path):
            os.makedirs(self.storage_path)

    async def save_email(self, message, sender, recipients, raw_data=None):
        """
        保存邮件到数据库

        Args:
            message: 已解析的邮件对象
            sender: 发件人邮箱
            recipients: 收件人邮箱列表
            raw_data: 原始邮件数据

        Returns:
            (bool, str): 成功标志和错误信息
        """
        try:
            logging.info(f"开始保存邮件: 发件人={sender}, 收件人={recipients}")

            # 从消息对象中提取主题
            subject = message.get('Subject', '')
            if subject is None:
                subject = ''

            logging.info(f"邮件主题: {subject}")

            # 获取邮件内容（文本和HTML）
            body_text = ""
            body_html = ""
            attachments = []

            # 处理多部分邮件
            if message.is_multipart():
                logging.info("处理多部分邮件")
                for part in message.walk():
                    content_type = part.get_content_type()
                    content_disposition = str(part.get("Content-Disposition") or "")
                    logging.debug(f"处理邮件部分: 类型={content_type}, 处置={content_disposition}")

                    # 处理附件
                    if "attachment" in content_disposition:
                        try:
                            filename = part.get_filename()
                            if filename:
                                payload = part.get_payload(decode=True)
                                if payload and len(payload) > 0:
                                    logging.info(f"发现附件: {filename}, 大小={len(payload)}字节")
                                    # 将附件信息添加到列表，稍后处理
                                    attachments.append({
                                        'filename': filename,
                                        'content_type': content_type,
                                        'data': payload
                                    })
                        except Exception as e:
                            logging.error(f"处理附件时出错: {str(e)}")
                            continue

                    # 处理内容部分
                    elif content_type == "text/plain" and not body_text:
                        try:
                            payload = part.get_payload(decode=True)
                            if payload:
                                charset = part.get_content_charset() or 'utf-8'
                                try:
                                    body_text = payload.decode(charset, errors='replace')
                                    logging.info(f"提取到纯文本内容: {len(body_text)}字节")
                                except Exception as e:
                                    logging.error(f"解码纯文本内容失败: {e}")
                                    body_text = payload.decode('utf-8', errors='replace')
                        except Exception as e:
                            logging.error(f"获取纯文本部分时出错: {str(e)}")

                    elif content_type == "text/html" and not body_html:
                        try:
                            payload = part.get_payload(decode=True)
                            if payload:
                                charset = part.get_content_charset() or 'utf-8'
                                try:
                                    body_html = payload.decode(charset, errors='replace')
                                    logging.info(f"提取到HTML内容: {len(body_html)}字节")
                                except Exception as e:
                                    logging.error(f"解码HTML内容失败: {e}")
                                    body_html = payload.decode('utf-8', errors='replace')
                        except Exception as e:
                            logging.error(f"获取HTML部分时出错: {str(e)}")

            # 处理单部分邮件
            else:
                logging.info("处理单部分邮件")
                content_type = message.get_content_type()
                logging.debug(f"单部分邮件类型: {content_type}")

                try:
                    payload = message.get_payload(decode=True)
                    if payload:
                        charset = message.get_content_charset() or 'utf-8'
                        logging.debug(f"邮件编码: {charset}")
                        try:
                            decoded_content = payload.decode(charset, errors='replace')
                        except Exception as e:
                            logging.error(f"解码内容失败: {e}")
                            decoded_content = payload.decode('utf-8', errors='replace')

                        if content_type == 'text/plain':
                            body_text = decoded_content
                            logging.info(f"提取到纯文本内容: {len(body_text)}字节")
                        elif content_type == 'text/html':
                            body_html = decoded_content
                            logging.info(f"提取到HTML内容: {len(body_html)}字节")
                        else:
                            logging.warning(f"未知内容类型: {content_type}")
                            # 假设为纯文本
                            body_text = decoded_content
                except Exception as e:
                    logging.error(f"获取邮件内容时出错: {str(e)}")

            # 如果仍然没有内容，尝试从原始数据中提取
            if not body_text and not body_html and raw_data:
                logging.info("从原始数据中提取内容")
                try:
                    # 简单提取，可能不适用于所有情况
                    if '<html>' in raw_data.lower():
                        body_html = raw_data
                    else:
                        body_text = raw_data
                except Exception as e:
                    logging.error(f"从原始数据提取内容失败: {str(e)}")

            logging.info(f"提取完成: 纯文本={len(body_text)}字节, HTML={len(body_html)}字节, 附件数={len(attachments)}")

            # 保存到数据库
            session = self.db_session_factory()
            try:
                # 查找收件人对应的邮箱
                mailbox_id = None
                recipients_list = recipients if isinstance(recipients, list) else [recipients]

                for recipient in recipients_list:
                    # 提取域名和用户名
                    if '@' in recipient:
                        username, domain = recipient.split('@', 1)
                        logging.info(f"查找邮箱: 用户名={username}, 域名={domain}")

                        # 查询域名
                        domain_obj = session.query(Domain).filter(Domain.name == domain).first()
                        if domain_obj:
                            # 查询邮箱
                            mailbox = session.query(Mailbox).filter(
                                Mailbox.domain_id == domain_obj.id,
                                Mailbox.address == username
                            ).first()

                            if mailbox:
                                mailbox_id = mailbox.id
                                logging.info(f"找到邮箱ID: {mailbox_id}")
                                break

                if not mailbox_id:
                    logging.error(f"收件人 {recipients} 没有对应的邮箱记录")
                    return False, "收件人邮箱不存在"

                # 创建新邮件记录
                new_email = Email(
                    mailbox_id=mailbox_id,  # 设置邮箱ID
                    subject=subject,
                    sender=sender,
                    recipients=','.join(recipients_list) if len(recipients_list) > 1 else recipients_list[0],
                    body_text=body_text,
                    body_html=body_html,
                    received_at=datetime.now()
                )

                # 提取验证码和验证链接（如果有）
                new_email.extract_verification_data()

                # 保存邮件
                session.add(new_email)
                session.commit()
                email_id = new_email.id
                logging.info(f"邮件保存到数据库, ID={email_id}")

                # 处理附件
                if attachments:
                    for attachment_data in attachments:
                        attachment = Attachment(
                            email_id=email_id,
                            filename=attachment_data['filename'],
                            content_type=attachment_data['content_type'],
                            size=len(attachment_data['data']),
                            data=attachment_data['data']
                        )
                        session.add(attachment)
                    session.commit()
                    logging.info(f"保存了{len(attachments)}个附件")

                # 保存原始邮件到文件系统
                try:
                    if raw_data and email_id:
                        email_dir = os.path.join(self.storage_path, 'emails')
                        os.makedirs(email_dir, exist_ok=True)
                        email_path = os.path.join(email_dir, f'email_{email_id}.eml')
                        with open(email_path, 'w', encoding='utf-8') as f:
                            f.write(raw_data)
                        logging.info(f"原始邮件保存到: {email_path}")
                except Exception as e:
                    logging.error(f"保存原始邮件到文件系统失败: {str(e)}")

                return True, f"邮件保存成功，ID: {email_id}"
            except Exception as e:
                logging.error(f"保存邮件到数据库失败: {str(e)}")
                return False, f"保存邮件失败: {str(e)}"
            finally:
                session.close()
        except Exception as e:
            logging.error(f"保存邮件时出现未处理异常: {str(e)}")
            import traceback
            traceback.print_exc()
            return False, f"保存邮件过程中出错: {str(e)}"

    def get_emails_for_mailbox(self, mailbox_id, limit=50, offset=0, unread_only=False):
        """获取指定邮箱的邮件列表"""
        db = self.db_session_factory()
        try:
            query = db.query(Email).filter(Email.mailbox_id == mailbox_id)

            if unread_only:
                query = query.filter(Email.read == False)

            total = query.count()

            emails = query.order_by(Email.received_at.desc()) \
                .limit(limit) \
                .offset(offset) \
                .all()

            return {
                'total': total,
                'items': [email.to_dict() for email in emails]
            }
        except Exception as e:
            logger.error(f"获取邮件列表时出错: {str(e)}")
            return {'total': 0, 'items': []}
        finally:
            db.close()

    def get_email_by_id(self, email_id, mark_as_read=True):
        """获取指定ID的邮件详情"""
        db = self.db_session_factory()
        try:
            email = db.query(Email).filter(Email.id == email_id).first()

            if not email:
                return None

            if mark_as_read and not email.read:
                email.read = True
                email.last_read = datetime.now()
                db.commit()

            # 获取附件信息
            attachments = [attachment.to_dict() for attachment in email.attachments]

            # 构建完整响应
            result = email.to_dict()
            result['body_text'] = email.body_text
            result['body_html'] = email.body_html
            result['attachments'] = attachments

            return result
        except Exception as e:
            db.rollback()
            logger.error(f"获取邮件详情时出错: {str(e)}")
            return None
        finally:
            db.close()

    def delete_email(self, email_id):
        """删除指定ID的邮件"""
        db = self.db_session_factory()
        try:
            email = db.query(Email).filter(Email.id == email_id).first()

            if not email:
                return False

            db.delete(email)
            db.commit()
            return True
        except Exception as e:
            db.rollback()
            logger.error(f"删除邮件时出错: {str(e)}")
            return False
        finally:
            db.close()

    def get_attachment_content(self, attachment_id):
        """获取附件内容"""
        db = self.db_session_factory()
        try:
            attachment = db.query(Attachment).filter(Attachment.id == attachment_id).first()

            if not attachment:
                return None

            content = attachment.get_content()

            return {
                'content': content,
                'filename': attachment.filename,
                'content_type': attachment.content_type
            }
        except Exception as e:
            logger.error(f"获取附件内容时出错: {str(e)}")
            return None
        finally:
            db.close()