|
@@ -18,9 +18,9 @@ from email_reply_parser import EmailReplyParser
|
18
|
18
|
|
19
|
19
|
from tracim.lib.base import logger
|
20
|
20
|
|
21
|
|
-TRACIM_SPECIAL_KEY_HEADER = "X-Tracim-Key"
|
22
|
|
-BS_HTML_BODY_PARSE_CONFIG = {
|
23
|
|
- 'tag_blacklist': ["script", "style", "blockquote"],
|
|
21
|
+TRACIM_SPECIAL_KEY_HEADER = 'X-Tracim-Key'
|
|
22
|
+BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG = {
|
|
23
|
+ 'tag_blacklist': ['script', 'style', 'blockquote'],
|
24
|
24
|
'class_blacklist': ['moz-cite-prefix', 'gmail_extra', 'gmail_quote',
|
25
|
25
|
'yahoo_quoted'],
|
26
|
26
|
'id_blacklist': ['reply-intro'],
|
|
@@ -29,6 +29,8 @@ BS_HTML_BODY_PARSE_CONFIG = {
|
29
|
29
|
'thead', 'tr', 'td', 'tbody', 'table', 'p', 'pre'],
|
30
|
30
|
'attrs_whitelist': ['href'],
|
31
|
31
|
}
|
|
32
|
+CONTENT_TYPE_TEXT_PLAIN = 'text/plain'
|
|
33
|
+CONTENT_TYPE_TEXT_HTML = 'text/html'
|
32
|
34
|
|
33
|
35
|
|
34
|
36
|
class DecodedMail(object):
|
|
@@ -63,30 +65,30 @@ class DecodedMail(object):
|
63
|
65
|
body = None
|
64
|
66
|
if body_part:
|
65
|
67
|
charset = body_part.get_content_charset('iso-8859-1')
|
66
|
|
- ctype = body_part.get_content_type()
|
67
|
|
- if ctype == "text/plain":
|
|
68
|
+ content_type = body_part.get_content_type()
|
|
69
|
+ if content_type == CONTENT_TYPE_TEXT_PLAIN:
|
68
|
70
|
txt_body = body_part.get_payload(decode=True).decode(
|
69
|
71
|
charset)
|
70
|
72
|
body = DecodedMail._parse_txt_body(txt_body)
|
71
|
73
|
|
72
|
|
- elif ctype == "text/html":
|
|
74
|
+ elif content_type == CONTENT_TYPE_TEXT_HTML:
|
73
|
75
|
html_body = body_part.get_payload(decode=True).decode(
|
74
|
76
|
charset)
|
75
|
77
|
body = DecodedMail._parse_html_body(html_body)
|
76
|
78
|
|
77
|
79
|
return body
|
78
|
80
|
|
79
|
|
- @staticmethod
|
80
|
|
- def _parse_txt_body(txt_body: str):
|
|
81
|
+ @classmethod
|
|
82
|
+ def _parse_txt_body(cls, txt_body: str):
|
81
|
83
|
txt_body = EmailReplyParser.parse_reply(txt_body)
|
82
|
84
|
html_body = markdown.markdown(txt_body)
|
83
|
85
|
body = DecodedMail._parse_html_body(html_body)
|
84
|
86
|
return body
|
85
|
87
|
|
86
|
|
- @staticmethod
|
87
|
|
- def _parse_html_body(html_body: str):
|
|
88
|
+ @classmethod
|
|
89
|
+ def _parse_html_body(cls, html_body: str):
|
88
|
90
|
soup = BeautifulSoup(html_body)
|
89
|
|
- config = BS_HTML_BODY_PARSE_CONFIG
|
|
91
|
+ config = BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG
|
90
|
92
|
for tag in soup.findAll():
|
91
|
93
|
if DecodedMail._tag_to_extract(tag):
|
92
|
94
|
tag.extract()
|
|
@@ -99,9 +101,9 @@ class DecodedMail(object):
|
99
|
101
|
tag.unwrap()
|
100
|
102
|
return str(soup)
|
101
|
103
|
|
102
|
|
- @staticmethod
|
103
|
|
- def _tag_to_extract(tag) -> bool:
|
104
|
|
- config = BS_HTML_BODY_PARSE_CONFIG
|
|
104
|
+ @classmethod
|
|
105
|
+ def _tag_to_extract(cls, tag) -> bool:
|
|
106
|
+ config = BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG
|
105
|
107
|
if tag.name.lower() in config['tag_blacklist']:
|
106
|
108
|
return True
|
107
|
109
|
if 'class' in tag.attrs:
|
|
@@ -114,22 +116,23 @@ class DecodedMail(object):
|
114
|
116
|
return True
|
115
|
117
|
return False
|
116
|
118
|
|
117
|
|
-
|
118
|
119
|
def _get_mime_body_message(self) -> typing.Optional[Message]:
|
119
|
120
|
# FIXME - G.M - 2017-11-16 - Use stdlib msg.get_body feature for py3.6+
|
120
|
121
|
# FIXME - G.M - 2017-11-16 - Check support for non-multipart mail
|
121
|
122
|
part = None
|
122
|
123
|
# Check for html
|
123
|
124
|
for part in self._message.walk():
|
124
|
|
- ctype = part.get_content_type()
|
125
|
|
- cdispo = str(part.get('Content-Disposition'))
|
126
|
|
- if ctype == 'text/html' and 'attachment' not in cdispo:
|
|
125
|
+ content_type = part.get_content_type()
|
|
126
|
+ content_dispo = str(part.get('Content-Disposition'))
|
|
127
|
+ if content_type == CONTENT_TYPE_TEXT_HTML \
|
|
128
|
+ and 'attachment' not in content_dispo:
|
127
|
129
|
return part
|
128
|
130
|
# check for plain text
|
129
|
131
|
for part in self._message.walk():
|
130
|
|
- ctype = part.get_content_type()
|
131
|
|
- cdispo = str(part.get('Content-Disposition'))
|
132
|
|
- if ctype == 'text/plain' and 'attachment' not in cdispo:
|
|
132
|
+ content_type = part.get_content_type()
|
|
133
|
+ content_dispo = str(part.get('Content-Disposition'))
|
|
134
|
+ if content_type == CONTENT_TYPE_TEXT_PLAIN and 'attachment' \
|
|
135
|
+ not in content_dispo:
|
133
|
136
|
return part
|
134
|
137
|
return part
|
135
|
138
|
|
|
@@ -153,8 +156,8 @@ class DecodedMail(object):
|
153
|
156
|
|
154
|
157
|
return key
|
155
|
158
|
|
156
|
|
- @staticmethod
|
157
|
|
- def find_key_from_mail_address(mail_address: str) \
|
|
159
|
+ @classmethod
|
|
160
|
+ def find_key_from_mail_address(cls, mail_address: str) \
|
158
|
161
|
-> typing.Optional[str]:
|
159
|
162
|
""" Parse mail_adress-like string
|
160
|
163
|
to retrieve key.
|
|
@@ -174,8 +177,14 @@ class DecodedMail(object):
|
174
|
177
|
class MailFetcher(object):
|
175
|
178
|
|
176
|
179
|
def __init__(self,
|
177
|
|
- host: str, port: str, user: str, password: str, folder: str,
|
178
|
|
- delay: int, endpoint: str, token:str) \
|
|
180
|
+ host: str,
|
|
181
|
+ port: str,
|
|
182
|
+ user: str,
|
|
183
|
+ password: str,
|
|
184
|
+ folder: str,
|
|
185
|
+ delay: int,
|
|
186
|
+ endpoint: str,
|
|
187
|
+ token: str) \
|
179
|
188
|
-> None:
|
180
|
189
|
"""
|
181
|
190
|
Fetch mail from a mailbox folder through IMAP and add their content to
|
|
@@ -269,11 +278,11 @@ class MailFetcher(object):
|
269
|
278
|
unsended_mail = []
|
270
|
279
|
while self._mails:
|
271
|
280
|
mail = self._mails.pop()
|
272
|
|
- msg = {"token": self.token,
|
273
|
|
- "user_mail": mail.get_from_address(),
|
274
|
|
- "content_id": mail.get_key(),
|
275
|
|
- "payload": {
|
276
|
|
- "content": mail.get_body(),
|
|
281
|
+ msg = {'token': self.token,
|
|
282
|
+ 'user_mail': mail.get_from_address(),
|
|
283
|
+ 'content_id': mail.get_key(),
|
|
284
|
+ 'payload': {
|
|
285
|
+ 'content': mail.get_body(),
|
277
|
286
|
}}
|
278
|
287
|
try:
|
279
|
288
|
r = requests.post(self.endpoint, json=msg)
|