|
@@ -0,0 +1,335 @@
|
|
1
|
+# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+import sys
|
|
4
|
+import time
|
|
5
|
+import imaplib
|
|
6
|
+import datetime
|
|
7
|
+import json
|
|
8
|
+import typing
|
|
9
|
+from email.message import Message
|
|
10
|
+from email.header import Header, decode_header, make_header
|
|
11
|
+from email.utils import parseaddr, parsedate_tz, mktime_tz
|
|
12
|
+from email import message_from_bytes
|
|
13
|
+
|
|
14
|
+import markdown
|
|
15
|
+import requests
|
|
16
|
+from bs4 import BeautifulSoup, Tag
|
|
17
|
+from email_reply_parser import EmailReplyParser
|
|
18
|
+
|
|
19
|
+from tracim.lib.base import logger
|
|
20
|
+
|
|
21
|
+TRACIM_SPECIAL_KEY_HEADER = 'X-Tracim-Key'
|
|
22
|
+# TODO BS 20171124: Think about replace thin dict config by object
|
|
23
|
+BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG = {
|
|
24
|
+ 'tag_blacklist': ['script', 'style', 'blockquote'],
|
|
25
|
+ 'class_blacklist': ['moz-cite-prefix', 'gmail_extra', 'gmail_quote',
|
|
26
|
+ 'yahoo_quoted'],
|
|
27
|
+ 'id_blacklist': ['reply-intro'],
|
|
28
|
+ 'tag_whitelist': ['a', 'b', 'strong', 'i', 'br', 'ul', 'li', 'ol',
|
|
29
|
+ 'em', 'i', 'u',
|
|
30
|
+ 'thead', 'tr', 'td', 'tbody', 'table', 'p', 'pre'],
|
|
31
|
+ 'attrs_whitelist': ['href'],
|
|
32
|
+}
|
|
33
|
+CONTENT_TYPE_TEXT_PLAIN = 'text/plain'
|
|
34
|
+CONTENT_TYPE_TEXT_HTML = 'text/html'
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+class DecodedMail(object):
|
|
38
|
+ def __init__(self, message: Message) -> None:
|
|
39
|
+ self._message = message
|
|
40
|
+
|
|
41
|
+ def _decode_header(self, header_title: str) -> typing.Optional[str]:
|
|
42
|
+ # FIXME : Handle exception
|
|
43
|
+ if header_title in self._message:
|
|
44
|
+ return str(make_header(decode_header(self._message[header_title])))
|
|
45
|
+ else:
|
|
46
|
+ return None
|
|
47
|
+
|
|
48
|
+ def get_subject(self) -> typing.Optional[str]:
|
|
49
|
+ return self._decode_header('subject')
|
|
50
|
+
|
|
51
|
+ def get_from_address(self) -> str:
|
|
52
|
+ return parseaddr(self._message['From'])[1]
|
|
53
|
+
|
|
54
|
+ def get_to_address(self) -> str:
|
|
55
|
+ return parseaddr(self._message['To'])[1]
|
|
56
|
+
|
|
57
|
+ def get_first_ref(self) -> str:
|
|
58
|
+ return parseaddr(self._message['References'])[1]
|
|
59
|
+
|
|
60
|
+ def get_special_key(self) -> typing.Optional[str]:
|
|
61
|
+ return self._decode_header(TRACIM_SPECIAL_KEY_HEADER)
|
|
62
|
+
|
|
63
|
+ def get_body(self) -> typing.Optional[str]:
|
|
64
|
+ body_part = self._get_mime_body_message()
|
|
65
|
+ body = None
|
|
66
|
+ if body_part:
|
|
67
|
+ charset = body_part.get_content_charset('iso-8859-1')
|
|
68
|
+ content_type = body_part.get_content_type()
|
|
69
|
+ if content_type == CONTENT_TYPE_TEXT_PLAIN:
|
|
70
|
+ txt_body = body_part.get_payload(decode=True).decode(
|
|
71
|
+ charset)
|
|
72
|
+ body = DecodedMail._parse_txt_body(txt_body)
|
|
73
|
+
|
|
74
|
+ elif content_type == CONTENT_TYPE_TEXT_HTML:
|
|
75
|
+ html_body = body_part.get_payload(decode=True).decode(
|
|
76
|
+ charset)
|
|
77
|
+ body = DecodedMail._parse_html_body(html_body)
|
|
78
|
+
|
|
79
|
+ return body
|
|
80
|
+
|
|
81
|
+ @classmethod
|
|
82
|
+ def _parse_txt_body(cls, txt_body: str) -> str:
|
|
83
|
+ txt_body = EmailReplyParser.parse_reply(txt_body)
|
|
84
|
+ html_body = markdown.markdown(txt_body)
|
|
85
|
+ body = DecodedMail._parse_html_body(html_body)
|
|
86
|
+ return body
|
|
87
|
+
|
|
88
|
+ @classmethod
|
|
89
|
+ def _parse_html_body(cls, html_body: str) -> str:
|
|
90
|
+ soup = BeautifulSoup(html_body, 'html.parser')
|
|
91
|
+ config = BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG
|
|
92
|
+ for tag in soup.findAll():
|
|
93
|
+ if DecodedMail._tag_to_extract(tag):
|
|
94
|
+ tag.extract()
|
|
95
|
+ elif tag.name.lower() in config['tag_whitelist']:
|
|
96
|
+ attrs = dict(tag.attrs)
|
|
97
|
+ for attr in attrs:
|
|
98
|
+ if attr not in config['attrs_whitelist']:
|
|
99
|
+ del tag.attrs[attr]
|
|
100
|
+ else:
|
|
101
|
+ tag.unwrap()
|
|
102
|
+ return str(soup)
|
|
103
|
+
|
|
104
|
+ @classmethod
|
|
105
|
+ def _tag_to_extract(cls, tag: Tag) -> bool:
|
|
106
|
+ config = BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG
|
|
107
|
+ if tag.name.lower() in config['tag_blacklist']:
|
|
108
|
+ return True
|
|
109
|
+ if 'class' in tag.attrs:
|
|
110
|
+ for elem in config['class_blacklist']:
|
|
111
|
+ if elem in tag.attrs['class']:
|
|
112
|
+ return True
|
|
113
|
+ if 'id' in tag.attrs:
|
|
114
|
+ for elem in config['id_blacklist']:
|
|
115
|
+ if elem in tag.attrs['id']:
|
|
116
|
+ return True
|
|
117
|
+ return False
|
|
118
|
+
|
|
119
|
+ def _get_mime_body_message(self) -> typing.Optional[Message]:
|
|
120
|
+ # TODO - G.M - 2017-11-16 - Use stdlib msg.get_body feature for py3.6+
|
|
121
|
+ part = None
|
|
122
|
+ # Check for html
|
|
123
|
+ for part in self._message.walk():
|
|
124
|
+ content_type = part.get_content_type()
|
|
125
|
+ content_dispo = str(part.get('Content-Disposition'))
|
|
126
|
+ if content_type == CONTENT_TYPE_TEXT_HTML \
|
|
127
|
+ and 'attachment' not in content_dispo:
|
|
128
|
+ return part
|
|
129
|
+ # check for plain text
|
|
130
|
+ for part in self._message.walk():
|
|
131
|
+ content_type = part.get_content_type()
|
|
132
|
+ content_dispo = str(part.get('Content-Disposition'))
|
|
133
|
+ if content_type == CONTENT_TYPE_TEXT_PLAIN \
|
|
134
|
+ and 'attachment' not in content_dispo:
|
|
135
|
+ return part
|
|
136
|
+ return part
|
|
137
|
+
|
|
138
|
+ def get_key(self) -> typing.Optional[str]:
|
|
139
|
+
|
|
140
|
+ """
|
|
141
|
+ key is the string contain in some mail header we need to retrieve.
|
|
142
|
+ First try checking special header, them check 'to' header
|
|
143
|
+ and finally check first(oldest) mail-id of 'references' header
|
|
144
|
+ """
|
|
145
|
+ first_ref = self.get_first_ref()
|
|
146
|
+ to_address = self.get_to_address()
|
|
147
|
+ special_key = self.get_special_key()
|
|
148
|
+
|
|
149
|
+ if special_key:
|
|
150
|
+ return special_key
|
|
151
|
+ if to_address:
|
|
152
|
+ return DecodedMail.find_key_from_mail_address(to_address)
|
|
153
|
+ if first_ref:
|
|
154
|
+ return DecodedMail.find_key_from_mail_address(first_ref)
|
|
155
|
+
|
|
156
|
+ return None
|
|
157
|
+
|
|
158
|
+ @classmethod
|
|
159
|
+ def find_key_from_mail_address(
|
|
160
|
+ cls,
|
|
161
|
+ mail_address: str,
|
|
162
|
+ ) -> typing.Optional[str]:
|
|
163
|
+ """ Parse mail_adress-like string
|
|
164
|
+ to retrieve key.
|
|
165
|
+
|
|
166
|
+ :param mail_address: user+key@something like string
|
|
167
|
+ :return: key
|
|
168
|
+ """
|
|
169
|
+ username = mail_address.split('@')[0]
|
|
170
|
+ username_data = username.split('+')
|
|
171
|
+ if len(username_data) == 2:
|
|
172
|
+ return username_data[1]
|
|
173
|
+ return None
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+class MailFetcher(object):
|
|
177
|
+ def __init__(
|
|
178
|
+ self,
|
|
179
|
+ host: str,
|
|
180
|
+ port: str,
|
|
181
|
+ user: str,
|
|
182
|
+ password: str,
|
|
183
|
+ use_ssl: bool,
|
|
184
|
+ folder: str,
|
|
185
|
+ delay: int,
|
|
186
|
+ endpoint: str,
|
|
187
|
+ token: str,
|
|
188
|
+ ) -> None:
|
|
189
|
+ """
|
|
190
|
+ Fetch mail from a mailbox folder through IMAP and add their content to
|
|
191
|
+ Tracim through http according to mail Headers.
|
|
192
|
+ Fetch is regular.
|
|
193
|
+ :param host: imap server hostname
|
|
194
|
+ :param port: imap connection port
|
|
195
|
+ :param user: user login of mailbox
|
|
196
|
+ :param password: user password of mailbox
|
|
197
|
+ :param use_ssl: use imap over ssl connection
|
|
198
|
+ :param folder: mail folder where new mail are fetched
|
|
199
|
+ :param delay: seconds to wait before fetching new mail again
|
|
200
|
+ :param endpoint: tracim http endpoint where decoded mail are send.
|
|
201
|
+ :param token: token to authenticate http connexion
|
|
202
|
+ """
|
|
203
|
+ self._connection = None
|
|
204
|
+ self.host = host
|
|
205
|
+ self.port = port
|
|
206
|
+ self.user = user
|
|
207
|
+ self.password = password
|
|
208
|
+ self.use_ssl = use_ssl
|
|
209
|
+ self.folder = folder
|
|
210
|
+ self.delay = delay
|
|
211
|
+ self.endpoint = endpoint
|
|
212
|
+ self.token = token
|
|
213
|
+
|
|
214
|
+ self._is_active = True
|
|
215
|
+
|
|
216
|
+ def run(self) -> None:
|
|
217
|
+ while self._is_active:
|
|
218
|
+ time.sleep(self.delay)
|
|
219
|
+ try:
|
|
220
|
+ self._connect()
|
|
221
|
+ messages = self._fetch()
|
|
222
|
+ # TODO - G.M - 2017-11-22 retry sending unsended mail
|
|
223
|
+ # These mails are return by _notify_tracim, flag them with "unseen"
|
|
224
|
+ # or store them until new _notify_tracim call
|
|
225
|
+ cleaned_mails = [DecodedMail(msg) for msg in messages]
|
|
226
|
+ self._notify_tracim(cleaned_mails)
|
|
227
|
+ self._disconnect()
|
|
228
|
+ except Exception as e:
|
|
229
|
+ # TODO - G.M - 2017-11-23 - Identify possible exceptions
|
|
230
|
+ log = 'IMAP error: {}'
|
|
231
|
+ logger.warning(self, log.format(e.__str__()))
|
|
232
|
+
|
|
233
|
+ def stop(self) -> None:
|
|
234
|
+ self._is_active = False
|
|
235
|
+
|
|
236
|
+ def _connect(self) -> None:
|
|
237
|
+ # TODO - G.M - 2017-11-15 Verify connection/disconnection
|
|
238
|
+ # Are old connexion properly close this way ?
|
|
239
|
+ if self._connection:
|
|
240
|
+ self._disconnect()
|
|
241
|
+ # TODO - G.M - 2017-11-23 Support for predefined SSLContext ?
|
|
242
|
+ # without ssl_context param, tracim use default security configuration
|
|
243
|
+ # which is great in most case.
|
|
244
|
+ if self.use_ssl:
|
|
245
|
+ self._connection = imaplib.IMAP4_SSL(self.host, self.port)
|
|
246
|
+ else:
|
|
247
|
+ self._connection = imaplib.IMAP4(self.host, self.port)
|
|
248
|
+
|
|
249
|
+ try:
|
|
250
|
+ self._connection.login(self.user, self.password)
|
|
251
|
+ except Exception as e:
|
|
252
|
+ log = 'IMAP login error: {}'
|
|
253
|
+ logger.warning(self, log.format(e.__str__()))
|
|
254
|
+
|
|
255
|
+ def _disconnect(self) -> None:
|
|
256
|
+ if self._connection:
|
|
257
|
+ self._connection.close()
|
|
258
|
+ self._connection.logout()
|
|
259
|
+ self._connection = None
|
|
260
|
+
|
|
261
|
+ def _fetch(self) -> typing.List[Message]:
|
|
262
|
+ """
|
|
263
|
+ Get news message from mailbox
|
|
264
|
+ :return: list of new mails
|
|
265
|
+ """
|
|
266
|
+ messages = []
|
|
267
|
+ # select mailbox
|
|
268
|
+ rv, data = self._connection.select(self.folder)
|
|
269
|
+ if rv == 'OK':
|
|
270
|
+ # get mails
|
|
271
|
+ # TODO - G.M - 2017-11-15 Which files to select as new file ?
|
|
272
|
+ # Unseen file or All file from a directory (old one should be
|
|
273
|
+ # moved/ deleted from mailbox during this process) ?
|
|
274
|
+ rv, data = self._connection.search(None, "(UNSEEN)")
|
|
275
|
+ if rv == 'OK':
|
|
276
|
+ # get mail content
|
|
277
|
+ for num in data[0].split():
|
|
278
|
+ # INFO - G.M - 2017-11-23 - Fetch (RFC288) to retrieve all
|
|
279
|
+ # complete mails see example : https://docs.python.org/fr/3.5/library/imaplib.html#imap4-example . # nopep8
|
|
280
|
+ # Be careful, This method remove also mails from Unseen
|
|
281
|
+ # mails
|
|
282
|
+ rv, data = self._connection.fetch(num, '(RFC822)')
|
|
283
|
+ if rv == 'OK':
|
|
284
|
+ msg = message_from_bytes(data[0][1])
|
|
285
|
+ messages.append(msg)
|
|
286
|
+ else:
|
|
287
|
+ log = 'IMAP : Unable to get mail : {}'
|
|
288
|
+ logger.debug(self, log.format(str(rv)))
|
|
289
|
+ else:
|
|
290
|
+ # FIXME : Distinct error from empty mailbox ?
|
|
291
|
+ pass
|
|
292
|
+ else:
|
|
293
|
+ log = 'IMAP : Unable to open mailbox : {}'
|
|
294
|
+ logger.debug(self, log.format(str(rv)))
|
|
295
|
+ return messages
|
|
296
|
+
|
|
297
|
+ def _notify_tracim(
|
|
298
|
+ self,
|
|
299
|
+ mails: typing.List[DecodedMail],
|
|
300
|
+ ) -> typing.List[DecodedMail]:
|
|
301
|
+ """
|
|
302
|
+ Send http request to tracim endpoint
|
|
303
|
+ :param mails: list of mails to send
|
|
304
|
+ :return: unsended mails
|
|
305
|
+ """
|
|
306
|
+ unsended_mails = []
|
|
307
|
+ # TODO BS 20171124: Look around mail.get_from_address(), mail.get_key()
|
|
308
|
+ # , mail.get_body() etc ... for raise InvalidEmailError if missing
|
|
309
|
+ # required informations (actually get_from_address raise IndexError
|
|
310
|
+ # if no from address for example) and catch it here
|
|
311
|
+ while mails:
|
|
312
|
+ mail = mails.pop()
|
|
313
|
+ msg = {'token': self.token,
|
|
314
|
+ 'user_mail': mail.get_from_address(),
|
|
315
|
+ 'content_id': mail.get_key(),
|
|
316
|
+ 'payload': {
|
|
317
|
+ 'content': mail.get_body(),
|
|
318
|
+ }}
|
|
319
|
+ try:
|
|
320
|
+ r = requests.post(self.endpoint, json=msg)
|
|
321
|
+ if r.status_code not in [200, 204]:
|
|
322
|
+ log = 'bad status code response when sending mail to tracim: {}' # nopep8
|
|
323
|
+ logger.error(self, log.format(str(r.status_code)))
|
|
324
|
+ # TODO - G.M - Verify exception correctly works
|
|
325
|
+ except requests.exceptions.Timeout as e:
|
|
326
|
+ log = 'Timeout error to transmit fetched mail to tracim : {}'
|
|
327
|
+ logger.error(self, log.format(str(e)))
|
|
328
|
+ unsended_mails.append(mail)
|
|
329
|
+ break
|
|
330
|
+ except requests.exceptions.RequestException as e:
|
|
331
|
+ log = 'Fail to transmit fetched mail to tracim : {}'
|
|
332
|
+ logger.error(self, log.format(str(e)))
|
|
333
|
+ break
|
|
334
|
+
|
|
335
|
+ return unsended_mails
|