123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415 |
- # -*- coding: utf-8 -*-
-
- import time
- import imaplib
- import json
- import typing
- from email import message_from_bytes
- from email.header import decode_header
- from email.header import make_header
- from email.message import Message
- from email.utils import parseaddr
-
- import filelock
- import markdown
- import requests
- from email_reply_parser import EmailReplyParser
- from tracim.lib.base import logger
- from tracim.lib.email_processing.parser import ParsedHTMLMail
- from tracim.lib.email_processing.sanitizer import HtmlSanitizer
-
- TRACIM_SPECIAL_KEY_HEADER = 'X-Tracim-Key'
- CONTENT_TYPE_TEXT_PLAIN = 'text/plain'
- CONTENT_TYPE_TEXT_HTML = 'text/html'
-
- IMAP_SEEN_FLAG = '\\Seen'
- IMAP_CHECKED_FLAG = '\\Flagged'
-
- class MessageContainer(object):
- def __init__(self, message: Message, uid: int) -> None:
- self.message = message
- self.uid = uid
-
-
- class DecodedMail(object):
- def __init__(self, message: Message, uid: int=None) -> None:
- self._message = message
- self.uid = uid
-
- def _decode_header(self, header_title: str) -> typing.Optional[str]:
- # FIXME : Handle exception
- if header_title in self._message:
- return str(make_header(decode_header(self._message[header_title])))
- else:
- return None
-
- def get_subject(self) -> typing.Optional[str]:
- return self._decode_header('subject')
-
- def get_from_address(self) -> str:
- return parseaddr(self._message['From'])[1]
-
- def get_to_address(self) -> str:
- return parseaddr(self._message['To'])[1]
-
- def get_first_ref(self) -> str:
- return parseaddr(self._message['References'])[1]
-
- def get_special_key(self) -> typing.Optional[str]:
- return self._decode_header(TRACIM_SPECIAL_KEY_HEADER)
-
- def get_body(
- self,
- use_html_parsing=True,
- use_txt_parsing=True,
- ) -> typing.Optional[str]:
- body_part = self._get_mime_body_message()
- body = None
- if body_part:
- charset = body_part.get_content_charset('iso-8859-1')
- content_type = body_part.get_content_type()
- if content_type == CONTENT_TYPE_TEXT_PLAIN:
- txt_body = body_part.get_payload(decode=True).decode(
- charset)
- if use_txt_parsing:
- txt_body = EmailReplyParser.parse_reply(txt_body)
- html_body = markdown.markdown(txt_body)
- body = HtmlSanitizer.sanitize(html_body)
-
- elif content_type == CONTENT_TYPE_TEXT_HTML:
- html_body = body_part.get_payload(decode=True).decode(
- charset)
- if use_html_parsing:
- html_body = str(ParsedHTMLMail(html_body))
- body = HtmlSanitizer.sanitize(html_body)
-
- return body
-
- def _get_mime_body_message(self) -> typing.Optional[Message]:
- # TODO - G.M - 2017-11-16 - Use stdlib msg.get_body feature for py3.6+
- part = None
- # Check for html
- for part in self._message.walk():
- content_type = part.get_content_type()
- content_dispo = str(part.get('Content-Disposition'))
- if content_type == CONTENT_TYPE_TEXT_HTML \
- and 'attachment' not in content_dispo:
- return part
- # check for plain text
- for part in self._message.walk():
- content_type = part.get_content_type()
- content_dispo = str(part.get('Content-Disposition'))
- if content_type == CONTENT_TYPE_TEXT_PLAIN \
- and 'attachment' not in content_dispo:
- return part
- return part
-
- def get_key(self) -> typing.Optional[str]:
-
- """
- key is the string contain in some mail header we need to retrieve.
- First try checking special header, them check 'to' header
- and finally check first(oldest) mail-id of 'references' header
- """
- first_ref = self.get_first_ref()
- to_address = self.get_to_address()
- special_key = self.get_special_key()
-
- if special_key:
- return special_key
- if to_address:
- return DecodedMail.find_key_from_mail_address(to_address)
- if first_ref:
- return DecodedMail.find_key_from_mail_address(first_ref)
-
- return None
-
- @classmethod
- def find_key_from_mail_address(
- cls,
- mail_address: str,
- ) -> typing.Optional[str]:
- """ Parse mail_adress-like string
- to retrieve key.
-
- :param mail_address: user+key@something like string
- :return: key
- """
- username = mail_address.split('@')[0]
- username_data = username.split('+')
- if len(username_data) == 2:
- return username_data[1]
- return None
-
-
- class MailFetcher(object):
- def __init__(
- self,
- host: str,
- port: str,
- user: str,
- password: str,
- use_ssl: bool,
- folder: str,
- delay: int,
- endpoint: str,
- token: str,
- use_html_parsing: bool,
- use_txt_parsing: bool,
- filelock_path: str,
- ) -> None:
- """
- Fetch mail from a mailbox folder through IMAP and add their content to
- Tracim through http according to mail Headers.
- Fetch is regular.
- :param host: imap server hostname
- :param port: imap connection port
- :param user: user login of mailbox
- :param password: user password of mailbox
- :param use_ssl: use imap over ssl connection
- :param folder: mail folder where new mail are fetched
- :param delay: seconds to wait before fetching new mail again
- :param endpoint: tracim http endpoint where decoded mail are send.
- :param token: token to authenticate http connexion
- :param use_html_parsing: parse html mail
- :param use_txt_parsing: parse txt mail
- """
- self._connection = None
- self.host = host
- self.port = port
- self.user = user
- self.password = password
- self.use_ssl = use_ssl
- self.folder = folder
- self.delay = delay
- self.endpoint = endpoint
- self.token = token
- self.use_html_parsing = use_html_parsing
- self.use_txt_parsing = use_txt_parsing
- self.lock = filelock.FileLock(filelock_path)
- self._is_active = True
-
- def run(self) -> None:
- logger.info(self, 'Starting MailFetcher')
- while self._is_active:
- logger.debug(self, 'sleep for {}'.format(self.delay))
- time.sleep(self.delay)
- try:
- self._connect()
- with self.lock.acquire(timeout=10):
- messages = self._fetch()
- cleaned_mails = [DecodedMail(m.message, m.uid)
- for m in messages]
- self._notify_tracim(cleaned_mails)
- self._disconnect()
- except Exception as e:
- # TODO - G.M - 2017-11-23 - Identify possible exceptions
- log = 'IMAP error: {}'
- logger.warning(self, log.format(e.__str__()))
-
- def stop(self) -> None:
- self._is_active = False
- del self.lock
-
- def _connect(self) -> None:
- # TODO - G.M - 2017-11-15 Verify connection/disconnection
- # Are old connexion properly close this way ?
- if self._connection:
- logger.debug(self, 'Disconnect from IMAP')
- self._disconnect()
- # TODO - G.M - 2017-11-23 Support for predefined SSLContext ?
- # without ssl_context param, tracim use default security configuration
- # which is great in most case.
- if self.use_ssl:
- logger.debug(self, 'Connect IMAP {}:{} using SSL'.format(
- self.host,
- self.port,
- ))
- self._connection = imaplib.IMAP4_SSL(self.host, self.port)
- else:
- logger.debug(self, 'Connect IMAP {}:{}'.format(
- self.host,
- self.port,
- ))
- self._connection = imaplib.IMAP4(self.host, self.port)
-
- try:
- logger.debug(self, 'Login IMAP with login {}'.format(
- self.user,
- ))
- self._connection.login(self.user, self.password)
- except Exception as e:
- log = 'IMAP login error: {}'
- logger.error(self, log.format(e.__str__()))
-
- def _disconnect(self) -> None:
- if self._connection:
- self._connection.close()
- self._connection.logout()
- self._connection = None
-
- def _fetch(self) -> typing.List[MessageContainer]:
- """
- Get news message from mailbox
- :return: list of new mails
- """
- messages = []
- # select mailbox
- logger.debug(self, 'Fetch messages from folder {}'.format(
- self.folder,
- ))
- rv, data = self._connection.select(self.folder)
- logger.debug(self, 'Response status {}'.format(
- rv,
- ))
- if rv == 'OK':
- # get mails
- # TODO - G.M - 2017-11-15 Which files to select as new file ?
- # Unseen file or All file from a directory (old one should be
- # moved/ deleted from mailbox during this process) ?
- logger.debug(self, 'Fetch unseen messages')
-
- rv, data = self._connection.search(None, "(UNSEEN)")
- logger.debug(self, 'Response status {}'.format(
- rv,
- ))
- if rv == 'OK':
- # get mail content
- logger.debug(self, 'Found {} unseen mails'.format(
- len(data[0].split()),
- ))
- for uid in data[0].split():
- # INFO - G.M - 2017-12-08 - Fetch BODY.PEEK[]
- # Retrieve all mail(body and header) but don't set mail
- # as seen because of PEEK
- # see rfc3501
- logger.debug(self, 'Fetch mail "{}"'.format(
- uid,
- ))
- rv, data = self._connection.fetch(uid, 'BODY.PEEK[]')
- logger.debug(self, 'Response status {}'.format(
- rv,
- ))
- if rv == 'OK':
- msg = message_from_bytes(data[0][1])
- msg_container = MessageContainer(msg, uid)
- messages.append(msg_container)
- self._set_flag(uid, IMAP_SEEN_FLAG)
- else:
- log = 'IMAP : Unable to get mail : {}'
- logger.error(self, log.format(str(rv)))
- else:
- log = 'IMAP : Unable to get unseen mail : {}'
- logger.error(self, log.format(str(rv)))
- else:
- log = 'IMAP : Unable to open mailbox : {}'
- logger.error(self, log.format(str(rv)))
- return messages
-
- def _notify_tracim(
- self,
- mails: typing.List[DecodedMail],
- ) -> None:
- """
- Send http request to tracim endpoint
- :param mails: list of mails to send
- :return: unsended mails
- """
- logger.debug(self, 'Notify tracim about {} new responses'.format(
- len(mails),
- ))
- unsended_mails = []
- # TODO BS 20171124: Look around mail.get_from_address(), mail.get_key()
- # , mail.get_body() etc ... for raise InvalidEmailError if missing
- # required informations (actually get_from_address raise IndexError
- # if no from address for example) and catch it here
- while mails:
- mail = mails.pop()
- msg = {'token': self.token,
- 'user_mail': mail.get_from_address(),
- 'content_id': mail.get_key(),
- 'payload': {
- 'content': mail.get_body(
- use_html_parsing=self.use_html_parsing,
- use_txt_parsing=self.use_txt_parsing),
- }}
- try:
- logger.debug(
- self,
- 'Contact API on {} with body {}'.format(
- self.endpoint,
- json.dumps(msg),
- ),
- )
- r = requests.post(self.endpoint, json=msg)
- if r.status_code not in [200, 204]:
- details = r.json().get('msg')
- log = 'bad status code {} response when sending mail to tracim: {}' # nopep8
- logger.error(self, log.format(
- str(r.status_code),
- details,
- ))
- # Flag all correctly checked mail, unseen the others
- if r.status_code in [200, 204, 400]:
- self._set_flag(mail.uid, IMAP_CHECKED_FLAG)
- else:
- self._unset_flag(mail.uid, IMAP_SEEN_FLAG)
- # TODO - G.M - Verify exception correctly works
- except requests.exceptions.Timeout as e:
- log = 'Timeout error to transmit fetched mail to tracim : {}'
- logger.error(self, log.format(str(e)))
- unsended_mails.append(mail)
- self._unset_flag(mail.uid, IMAP_SEEN_FLAG)
- except requests.exceptions.RequestException as e:
- log = 'Fail to transmit fetched mail to tracim : {}'
- logger.error(self, log.format(str(e)))
- self._unset_flag(mail.uid, IMAP_SEEN_FLAG)
-
- def _set_flag(
- self,
- uid: int,
- flag: str,
- ) -> None:
- assert uid is not None
-
- rv, data = self._connection.store(
- uid,
- '+FLAGS',
- flag,
- )
- if rv == 'OK':
- log = 'Message {uid} set as {flag}.'.format(
- uid=uid,
- flag=flag)
- logger.debug(self, log)
- else:
- log = 'Can not set Message {uid} as {flag} : {rv}'.format(
- uid=uid,
- flag=flag,
- rv=rv)
- logger.error(self, log)
-
- def _unset_flag(
- self,
- uid: int,
- flag: str,
- ) -> None:
- assert uid is not None
-
- rv, data = self._connection.store(
- uid,
- '-FLAGS',
- flag,
- )
- if rv == 'OK':
- log = 'Message {uid} unset as {flag}.'.format(
- uid=uid,
- flag=flag)
- logger.debug(self, log)
- else:
- log = 'Can not unset Message {uid} as {flag} : {rv}'.format(
- uid=uid,
- flag=flag,
- rv=rv)
- logger.error(self, log)
|