Browse Source

Add Html Mail Parser able to detect Quote and Signature elements and order

Guénaël Muller 6 years ago
parent
commit
2a07115b40

+ 347 - 0
tracim/tracim/lib/email_body_parser.py View File

@@ -0,0 +1,347 @@
1
+import typing
2
+
3
+from bs4 import BeautifulSoup
4
+from bs4 import Tag
5
+from bs4 import NavigableString
6
+
7
+
8
+class BodyMailPartType(object):
9
+    Signature = 'sign'
10
+    Main = 'main'
11
+    Quote = 'quote'
12
+
13
+
14
+class BodyMailPart(object):
15
+    def __init__(
16
+            self,
17
+            text: str,
18
+            part_type: str
19
+    )-> None:
20
+        self.text = text
21
+        self.part_type = part_type
22
+
23
+
24
+class BodyMailParts(object):
25
+    """
26
+    Data Structure to Distinct part of a Mail body into a "list" of BodyMailPart
27
+    When 2 similar BodyMailPart (same part_type) are added one after the other,
28
+    it doesn't create a new Part, it just merge those elements into one.
29
+    It should always have only one Signature type part, at the end of the body.
30
+    This object doesn't provide other set method than append() in order to
31
+    preserve object coherence.
32
+    """
33
+    def __init__(self) -> None:
34
+        self._list = []  # type; List[BodyMailPart]
35
+
36
+    def __len__(self) -> int:
37
+        return len(self._list)
38
+
39
+    def __getitem__(self, index) -> BodyMailPart:
40
+        return self._list[index]
41
+
42
+    def __delitem__(self, index) -> None:
43
+        del self._list[index]
44
+        # Todo : check consistance
45
+
46
+    def append(self, value) -> None:
47
+        BodyMailParts._check_value(value)
48
+        self._check_sign_last_elem(value)
49
+        self._append(value)
50
+
51
+    def _append(self, value):
52
+        if len(self._list) > 0 and self._list[-1].part_type == value.part_type:
53
+            self._list[-1].text += value.text
54
+        else:
55
+            self._list.append(value)
56
+
57
+    @classmethod
58
+    def _check_value(cls, value) -> None:
59
+        if not isinstance(value, BodyMailPart):
60
+            raise TypeError()
61
+
62
+    def _check_sign_last_elem(self, value: BodyMailPart) -> None:
63
+        """
64
+        Check if last elem is a signature, if true, refuse to add a
65
+        non-signature item.
66
+        :param value: BodyMailPart to check
67
+        :return: None
68
+        """
69
+        if len(self._list) > 0:
70
+            if self._list[-1].part_type == BodyMailPartType.Signature and \
71
+                            value.part_type != BodyMailPartType.Signature:
72
+                raise SignatureIndexError(
73
+                    "Can't add element after signature element.")
74
+
75
+    def disable_signature(self) -> None:
76
+        """
77
+        Consider the chosen signature to a normal main content element
78
+        :return: None
79
+        """
80
+        if (
81
+            len(self._list) > 0 and
82
+            self._list[-1].part_type == BodyMailPartType.Signature
83
+        ):
84
+            self._list[-1].part_type = BodyMailPartType.Main
85
+            # If possible, concatenate with previous elem
86
+            if (
87
+                len(self._list) > 1 and
88
+                self._list[-2].part_type == BodyMailPartType.Main
89
+            ):
90
+                self._list[-2].text += self._list[-1].text
91
+                del self._list[-1]
92
+
93
+    def drop_part_type(self, part_type: str) -> None:
94
+        """
95
+        Drop all elem of one part_type
96
+        :param part_type: part_type to completely remove
97
+        :return: None
98
+        """
99
+        new_list = [x for x in self._list if x.part_type != part_type]
100
+        self._list = []
101
+        # INFO - G.M - 2017-11-27 - use append() to have a consistent list
102
+        for elem in new_list:
103
+            self.append(elem)
104
+
105
+    def get_nb_part_type(self, part_type: str) -> int:
106
+        """
107
+        Get number of elements of one part_type
108
+        :param part_type: part_type to check
109
+        :return: number of part_type elements
110
+        """
111
+        count = 0
112
+        for elem in self._list:
113
+            if elem.part_type == part_type:
114
+                count += 1
115
+        return count
116
+
117
+    def __str__(self) -> str:
118
+        s_mail = ''
119
+        for elem in self._list:
120
+            s_mail += elem.text
121
+        return str(s_mail)
122
+
123
+
124
+class SignatureIndexError(Exception):
125
+    pass
126
+
127
+
128
+class HtmlChecker(object):
129
+
130
+    @classmethod
131
+    def _has_attr_value(
132
+            cls,
133
+            elem: typing.Union[Tag, NavigableString],
134
+            attribute_name: str,
135
+            attribute_value: str,
136
+    )-> bool:
137
+        if isinstance(elem, Tag) and \
138
+                        attribute_name in elem.attrs and \
139
+                        attribute_value in elem.attrs[attribute_name]:
140
+            return True
141
+        return False
142
+
143
+
144
+class HtmlMailQuoteChecker(HtmlChecker):
145
+
146
+    @classmethod
147
+    def is_quote(
148
+            cls,
149
+            elem: typing.Union[Tag, NavigableString]
150
+    ) -> bool:
151
+        return cls._is_standard_quote(elem) \
152
+               or cls._is_thunderbird_quote(elem) \
153
+               or cls._is_gmail_quote(elem) \
154
+               or cls._is_yahoo_quote(elem) \
155
+               or cls._is_roundcube_quote(elem)
156
+
157
+    @classmethod
158
+    def _is_standard_quote(
159
+            cls,
160
+            elem: typing.Union[Tag, NavigableString]
161
+    ) -> bool:
162
+        if isinstance(elem, Tag) \
163
+                and elem.name.lower() == 'blockquote':
164
+            return True
165
+        return False
166
+
167
+    @classmethod
168
+    def _is_thunderbird_quote(
169
+            cls,
170
+            elem: typing.Union[Tag, NavigableString]
171
+    ) -> bool:
172
+        return cls._has_attr_value(elem, 'class', 'moz-cite-prefix')
173
+
174
+    @classmethod
175
+    def _is_gmail_quote(
176
+            cls,
177
+            elem: typing.Union[Tag, NavigableString]
178
+    ) -> bool:
179
+        if cls._has_attr_value(elem, 'class', 'gmail_extra'):
180
+            for child in elem.children:
181
+                if cls._has_attr_value(child, 'class', 'gmail_quote'):
182
+                    return True
183
+        return False
184
+
185
+    @classmethod
186
+    def _is_yahoo_quote(
187
+            cls,
188
+            elem: typing.Union[Tag, NavigableString]
189
+    ) -> bool:
190
+        return cls._has_attr_value(elem, 'class', 'yahoo_quoted')
191
+
192
+    @classmethod
193
+    def _is_roundcube_quote(
194
+            cls,
195
+            elem: typing.Union[Tag, NavigableString]
196
+    ) -> bool:
197
+        return cls._has_attr_value(elem, 'id', 'reply-intro')
198
+
199
+
200
+class HtmlMailSignatureChecker(HtmlChecker):
201
+
202
+    @classmethod
203
+    def is_signature(
204
+            cls,
205
+            elem: typing.Union[Tag, NavigableString]
206
+    ) -> bool:
207
+        return cls._is_thunderbird_signature(elem) \
208
+               or cls._is_gmail_signature(elem)
209
+
210
+    @classmethod
211
+    def _is_thunderbird_signature(
212
+            cls,
213
+            elem: typing.Union[Tag, NavigableString]
214
+    ) -> bool:
215
+        return cls._has_attr_value(elem,
216
+                                   'class',
217
+                                   'moz-signature')
218
+
219
+    @classmethod
220
+    def _is_gmail_signature(
221
+            cls,
222
+            elem: typing.Union[Tag, NavigableString]
223
+    ) -> bool:
224
+        if cls._has_attr_value(elem, 'class', 'gmail_signature'):
225
+            return True
226
+        if cls._has_attr_value(elem, 'class', 'gmail_extra'):
227
+            for child in elem.children:
228
+                if cls._has_attr_value(child, 'class', 'gmail_signature'):
229
+                    return True
230
+        if isinstance(elem,Tag) and elem.name.lower() == 'div':
231
+            for child in elem.children:
232
+                if cls._has_attr_value(child, 'class', 'gmail_signature'):
233
+                    return True
234
+        return False
235
+
236
+
237
+class ParsedHTMLMail(object):
238
+    """
239
+    Parse HTML Mail depending of some rules.
240
+    Distinct part of html mail body using BodyMailParts object and
241
+    process different rules.
242
+    """
243
+
244
+    def __init__(self, html_body: str):
245
+        self.src_html_body = html_body
246
+
247
+    def __str__(self):
248
+        return str(self._parse_mail())
249
+
250
+    def get_elements(self):
251
+        tree = self._make_sanitized_tree()
252
+        return self._distinct_elements(tree)
253
+
254
+    def _parse_mail(self) -> BodyMailParts:
255
+        elements = self.get_elements()
256
+        elements = self._process_elements(elements)
257
+        return elements
258
+
259
+    def _make_sanitized_tree(self):
260
+        """
261
+        Get only html body content and remove some unneeded elements
262
+        :return:
263
+        """
264
+        tree = BeautifulSoup(self.src_html_body, 'html.parser')
265
+
266
+        # Only parse body part of html if available
267
+        subtree = tree.find('body')
268
+        if subtree:
269
+            tree = BeautifulSoup(str(subtree), 'html.parser')
270
+
271
+        # if some sort of "meta_div", unwrap it
272
+        while len(tree.findAll(recursive=None)) == 1 and \
273
+                tree.find().name.lower() in ['body', 'div']:
274
+            tree.find().unwrap()
275
+
276
+        # drop some not useful html elem
277
+        for tag in tree.findAll():
278
+            if tag.name.lower() in ['br']:
279
+                tag.unwrap()
280
+                continue
281
+            if tag.name.lower() in ['script', 'style']:
282
+                tag.extract()
283
+
284
+        return tree
285
+
286
+    @classmethod
287
+    def _distinct_elements(cls, tree: BeautifulSoup) -> BodyMailParts:
288
+        elements = BodyMailParts()
289
+        for tag in list(tree):
290
+            txt = str(tag)
291
+            part_type = BodyMailPartType.Main
292
+            if isinstance(tag, NavigableString):
293
+                txt = tag.replace('\n', '').strip()
294
+            if not txt:
295
+                continue
296
+            if HtmlMailQuoteChecker.is_quote(tag):
297
+                part_type = BodyMailPartType.Quote
298
+            elif HtmlMailSignatureChecker.is_signature(tag):
299
+                part_type = BodyMailPartType.Signature
300
+            element = BodyMailPart(txt, part_type)
301
+            try:
302
+                elements.append(element)
303
+            except SignatureIndexError:
304
+                elements.disable_signature()
305
+                elements.append(element)
306
+        return elements
307
+
308
+    @classmethod
309
+    def _process_elements(cls, elements: BodyMailParts) -> BodyMailParts:
310
+        if len(elements) >= 2:
311
+            # Case 1 and 2, only one main and one quote
312
+            if elements.get_nb_part_type('main') == 1 and \
313
+                            elements.get_nb_part_type('quote') == 1:
314
+                # Case 1 : Main first
315
+                if elements[0].part_type == BodyMailPartType.Main:
316
+                    cls._process_main_first_case(elements)
317
+                # Case 2 : Quote first
318
+                if elements[0].part_type == BodyMailPartType.Quote:
319
+                    cls._process_quote_first_case(elements)
320
+            else:
321
+                # Case 3 : Multiple quotes and/or main
322
+                cls._process_multiples_elems_case(elements)
323
+        else:
324
+            cls._process_default_case(elements)
325
+            # default case (only one element or empty list)
326
+        return elements
327
+
328
+    @classmethod
329
+    def _process_quote_first_case(cls, elements: BodyMailParts):
330
+        elements.drop_part_type(BodyMailPartType.Signature)
331
+        pass
332
+
333
+    @classmethod
334
+    def _process_main_first_case(cls, elements: BodyMailParts):
335
+        elements.drop_part_type(BodyMailPartType.Quote)
336
+        elements.drop_part_type(BodyMailPartType.Signature)
337
+        pass
338
+
339
+    @classmethod
340
+    def _process_multiples_elems_case(cls, elements: BodyMailParts):
341
+        elements.drop_part_type(BodyMailPartType.Signature)
342
+        pass
343
+
344
+    @classmethod
345
+    def _process_default_case(cls, elements: BodyMailParts):
346
+        elements.drop_part_type(BodyMailPartType.Quote)
347
+        elements.drop_part_type(BodyMailPartType.Signature)

+ 11 - 10
tracim/tracim/lib/email_fetcher.py View File

@@ -17,16 +17,16 @@ from bs4 import BeautifulSoup, Tag
17 17
 from email_reply_parser import EmailReplyParser
18 18
 
19 19
 from tracim.lib.base import logger
20
+from tracim.lib.email_body_parser import ParsedHTMLMail
20 21
 
21 22
 TRACIM_SPECIAL_KEY_HEADER = 'X-Tracim-Key'
22 23
 # TODO BS 20171124: Think about replace thin dict config by object
23
-BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG = {
24
-    'tag_blacklist': ['script', 'style', 'blockquote'],
25
-    'class_blacklist': ['moz-cite-prefix', 'gmail_extra', 'gmail_quote',
26
-                        'yahoo_quoted'],
24
+BEAUTIFULSOUP_HTML_BODY_SANITIZE_CONFIG = {
25
+    'tag_blacklist': ['script', 'style'],
26
+    'class_blacklist': [],
27 27
     'id_blacklist': ['reply-intro'],
28 28
     'tag_whitelist': ['a', 'b', 'strong', 'i', 'br', 'ul', 'li', 'ol',
29
-                      'em', 'i', 'u',
29
+                      'em', 'i', 'u', 'blockquote', 'h1','h2','h3','h4',
30 30
                       'thead', 'tr', 'td', 'tbody', 'table', 'p', 'pre'],
31 31
     'attrs_whitelist': ['href'],
32 32
 }
@@ -74,7 +74,8 @@ class DecodedMail(object):
74 74
             elif content_type == CONTENT_TYPE_TEXT_HTML:
75 75
                 html_body = body_part.get_payload(decode=True).decode(
76 76
                     charset)
77
-                body = DecodedMail._parse_html_body(html_body)
77
+                html_body = str(ParsedHTMLMail(html_body))
78
+                body = DecodedMail._sanitize_html_body(html_body)
78 79
 
79 80
         return body
80 81
 
@@ -82,13 +83,13 @@ class DecodedMail(object):
82 83
     def _parse_txt_body(cls, txt_body: str) -> str:
83 84
         txt_body = EmailReplyParser.parse_reply(txt_body)
84 85
         html_body = markdown.markdown(txt_body)
85
-        body = DecodedMail._parse_html_body(html_body)
86
+        body = DecodedMail._sanitize_html_body(html_body)
86 87
         return body
87 88
 
88 89
     @classmethod
89
-    def _parse_html_body(cls, html_body: str) -> str:
90
+    def _sanitize_html_body(cls, html_body: str) -> str:
90 91
         soup = BeautifulSoup(html_body, 'html.parser')
91
-        config = BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG
92
+        config = BEAUTIFULSOUP_HTML_BODY_SANITIZE_CONFIG
92 93
         for tag in soup.findAll():
93 94
             if DecodedMail._tag_to_extract(tag):
94 95
                 tag.extract()
@@ -103,7 +104,7 @@ class DecodedMail(object):
103 104
 
104 105
     @classmethod
105 106
     def _tag_to_extract(cls, tag: Tag) -> bool:
106
-        config = BEAUTIFULSOUP_HTML_BODY_PARSE_CONFIG
107
+        config = BEAUTIFULSOUP_HTML_BODY_SANITIZE_CONFIG
107 108
         if tag.name.lower() in config['tag_blacklist']:
108 109
             return True
109 110
         if 'class' in tag.attrs:

+ 582 - 0
tracim/tracim/tests/library/test_email_body_parser.py View File

@@ -0,0 +1,582 @@
1
+from tracim.lib.email_body_parser import HtmlMailQuoteChecker
2
+from tracim.lib.email_body_parser import HtmlMailSignatureChecker
3
+from tracim.lib.email_body_parser import BodyMailParts
4
+from tracim.lib.email_body_parser import BodyMailPart
5
+from tracim.lib.email_body_parser import BodyMailPartType
6
+from tracim.lib.email_body_parser import SignatureIndexError
7
+from tracim.lib.email_body_parser import ParsedHTMLMail
8
+from tracim.tests import TestStandard
9
+from bs4 import BeautifulSoup,Tag
10
+from nose.tools import raises
11
+
12
+
13
+class TestHtmlMailQuoteChecker(TestStandard):
14
+    def test_unit__is_standard_quote_ok(self):
15
+        soup = BeautifulSoup('<blockquote></blockquote>', 'html.parser')
16
+        main_elem = soup.find()
17
+        assert HtmlMailQuoteChecker._is_standard_quote(main_elem) is True
18
+
19
+    def test_unit__is_standard_quote_no(self):
20
+        soup = BeautifulSoup('<a></a>')
21
+        main_elem = soup.find()
22
+        assert HtmlMailQuoteChecker._is_standard_quote(main_elem) is False
23
+
24
+    def test_unit__is_thunderbird_quote_ok(self):
25
+        soup = BeautifulSoup('<div class="moz-cite-prefix"></div>',
26
+                             'html.parser')
27
+        main_elem = soup.find()
28
+        assert HtmlMailQuoteChecker._is_thunderbird_quote(main_elem) is True
29
+
30
+    def test_unit__is_thunderbird_quote_no(self):
31
+        soup = BeautifulSoup('<div class="nothing"></div>')
32
+        main_elem = soup.find()
33
+        assert HtmlMailQuoteChecker._is_thunderbird_quote(main_elem) is False
34
+
35
+    def test_unit__is_gmail_quote_ok(self):
36
+        html = '<div class="gmail_extra">' + \
37
+              '<a></a><div class="gmail_quote"></div>' + \
38
+              '</div>'
39
+        soup = BeautifulSoup(html, 'html.parser')
40
+        main_elem = soup.find()
41
+        assert HtmlMailQuoteChecker._is_gmail_quote(main_elem) is True
42
+
43
+    def test_unit__is_gmail_quote_no(self):
44
+        soup = BeautifulSoup('<div class="nothing"></div>', 'html.parser')
45
+        main_elem = soup.find()
46
+        assert HtmlMailQuoteChecker._is_gmail_quote(main_elem) is False
47
+
48
+    def test_unit__is_gmail_quote_no_2(self):
49
+        html = '<div class="gmail_extra">' + \
50
+              '<a></a><div class="gmail_signature"></div>' + \
51
+              '</div>'
52
+        soup = BeautifulSoup(html, 'html.parser')
53
+        main_elem = soup.find()
54
+        assert HtmlMailQuoteChecker._is_gmail_quote(main_elem) is False
55
+
56
+    # TODO - G.M - 2017-11-24 - Check Yahoo and New roundcube html mail with
57
+    # correct mail example
58
+
59
+
60
+class TestHtmlMailSignatureChecker(TestStandard):
61
+    def test_unit__is_thunderbird_signature_ok(self):
62
+        soup = BeautifulSoup('<div class="moz-signature"></div>', 'html.parser')
63
+        main_elem = soup.find()
64
+        assert HtmlMailSignatureChecker._is_thunderbird_signature(main_elem) is True  # nopep8
65
+
66
+    def test_unit__is_thunderbird_signature_no(self):
67
+        soup = BeautifulSoup('<div class="other"></div>', 'html.parser')
68
+        main_elem = soup.find()
69
+        assert HtmlMailSignatureChecker._is_thunderbird_signature(main_elem) is False  # nopep8
70
+
71
+    def test_unit__is_gmail_signature_ok(self):
72
+        html = '<div class="gmail_extra">' + \
73
+               '<a></a><div class="gmail_quote"></div>' + \
74
+               '</div>'
75
+        soup = BeautifulSoup(html, 'html.parser')
76
+        main_elem = soup.find()
77
+        assert HtmlMailSignatureChecker._is_gmail_signature(main_elem) is False
78
+
79
+    def test_unit__is_gmail_signature_no(self):
80
+        soup = BeautifulSoup('<div class="nothing"></div>', 'html.parser')
81
+        main_elem = soup.find()
82
+        assert HtmlMailSignatureChecker._is_gmail_signature(main_elem) is False
83
+
84
+    def test_unit__is_gmail_signature_yes(self):
85
+        html = '<div class="gmail_extra">' + \
86
+               '<a></a><div class="gmail_signature"></div>' + \
87
+               '</div>'
88
+        soup = BeautifulSoup(html, 'html.parser')
89
+        main_elem = soup.find()
90
+        assert HtmlMailSignatureChecker._is_gmail_signature(main_elem) is True
91
+
92
+    def test_unit__is_gmail_signature_yes_2(self):
93
+        html = '<div class="gmail_signature">' + \
94
+               '</div>'
95
+        soup = BeautifulSoup(html, 'html.parser')
96
+        main_elem = soup.find()
97
+        assert HtmlMailSignatureChecker._is_gmail_signature(main_elem) is True
98
+
99
+class TestBodyMailsParts(TestStandard):
100
+
101
+    def test_unit__std_list_methods(self):
102
+        mail_parts = BodyMailParts()
103
+        assert len(mail_parts) == 0
104
+        a = BodyMailPart('a', BodyMailPartType.Main)
105
+        mail_parts._list.append(a)
106
+        assert len(mail_parts) == 1
107
+        assert mail_parts[0] == a
108
+        del mail_parts[0]
109
+        assert len(mail_parts) == 0
110
+
111
+    def test_unit__append_same_type(self):
112
+        mail_parts = BodyMailParts()
113
+        a = BodyMailPart('a', BodyMailPartType.Main)
114
+        mail_parts._append(a)
115
+        b = BodyMailPart('b', BodyMailPartType.Main)
116
+        mail_parts._append(b)
117
+        assert len(mail_parts) == 1
118
+        assert mail_parts[0].part_type == BodyMailPartType.Main
119
+        assert mail_parts[0].text == 'ab'
120
+
121
+    def test_unit__append_different_type(self):
122
+        mail_parts = BodyMailParts()
123
+        a = BodyMailPart('a', BodyMailPartType.Main)
124
+        mail_parts.append(a)
125
+        b = BodyMailPart('b', BodyMailPartType.Quote)
126
+        mail_parts._append(b)
127
+        assert len(mail_parts) == 2
128
+        assert mail_parts[0] == a
129
+        assert mail_parts[1] == b
130
+
131
+    @raises(TypeError)
132
+    def test_unit__check_value__type_error(self):
133
+        mail_parts = BodyMailParts()
134
+        mail_parts._check_value('a')
135
+
136
+    def test_unit__check_value__ok(self):
137
+        mail_parts = BodyMailParts()
138
+        a = BodyMailPart('a', BodyMailPartType.Main)
139
+        mail_parts._check_value(a)
140
+
141
+    @raises(SignatureIndexError)
142
+    def test_unit__check_sign_last_elem_check_main_after_sign(self):
143
+        mail_parts = BodyMailParts()
144
+        a = BodyMailPart('a', BodyMailPartType.Main)
145
+        mail_parts._list.append(a)
146
+        b = BodyMailPart('b', BodyMailPartType.Signature)
147
+        mail_parts._list.append(b)
148
+        c = BodyMailPart('c', BodyMailPartType.Main)
149
+        mail_parts._check_sign_last_elem(c)
150
+
151
+    def test_unit__check_sign_last_elem_check_sign_after_sign(self):
152
+        mail_parts = BodyMailParts()
153
+        a = BodyMailPart('a', BodyMailPartType.Main)
154
+        mail_parts._list.append(a)
155
+        b = BodyMailPart('b', BodyMailPartType.Signature)
156
+        mail_parts._list.append(b)
157
+        c = BodyMailPart('c', BodyMailPartType.Signature)
158
+        mail_parts._check_sign_last_elem(c)
159
+
160
+    def test_unit__disable_signature_no_sign(self):
161
+        mail_parts = BodyMailParts()
162
+        a = BodyMailPart('a', BodyMailPartType.Main)
163
+        mail_parts._list.append(a)
164
+        b = BodyMailPart('b', BodyMailPartType.Quote)
165
+        mail_parts._list.append(b)
166
+        mail_parts.disable_signature()
167
+        assert mail_parts[1] == b
168
+
169
+    def test_unit__disable_signature_sign_quote_as_previous_elem(self):
170
+        mail_parts = BodyMailParts()
171
+        a = BodyMailPart('a', BodyMailPartType.Main)
172
+        mail_parts._list.append(a)
173
+        b = BodyMailPart('b', BodyMailPartType.Quote)
174
+        mail_parts._list.append(b)
175
+        c = BodyMailPart('c', BodyMailPartType.Signature)
176
+        mail_parts._list.append(c)
177
+        mail_parts.disable_signature()
178
+        assert len(mail_parts) == 3
179
+        assert mail_parts[2].text == 'c'
180
+        assert mail_parts[2].part_type == BodyMailPartType.Main
181
+
182
+    def test_unit__disable_signature_sign_main_as_previous_elem(self):
183
+        mail_parts = BodyMailParts()
184
+        a = BodyMailPart('a', BodyMailPartType.Quote)
185
+        mail_parts._list.append(a)
186
+        b = BodyMailPart('b', BodyMailPartType.Main)
187
+        mail_parts._list.append(b)
188
+        c = BodyMailPart('c', BodyMailPartType.Signature)
189
+        mail_parts._list.append(c)
190
+        mail_parts.disable_signature()
191
+        assert len(mail_parts) == 2
192
+        assert mail_parts[1].text == 'bc'
193
+        assert mail_parts[1].part_type == BodyMailPartType.Main
194
+
195
+    def test_unit__drop_part_type(self):
196
+        mail_parts = BodyMailParts()
197
+        a = BodyMailPart('a', BodyMailPartType.Main)
198
+        mail_parts._list.append(a)
199
+        b = BodyMailPart('b', BodyMailPartType.Quote)
200
+        mail_parts._list.append(b)
201
+        c = BodyMailPart('c', BodyMailPartType.Signature)
202
+        mail_parts._list.append(c)
203
+        mail_parts.drop_part_type(BodyMailPartType.Quote)
204
+        assert len(mail_parts) == 2
205
+        assert mail_parts[0].text == 'a'
206
+        assert mail_parts[0].part_type == BodyMailPartType.Main
207
+        assert len(mail_parts) == 2
208
+        assert mail_parts[1].text == 'c'
209
+        assert mail_parts[1].part_type == BodyMailPartType.Signature
210
+
211
+    def test_unit__drop_part_type_consistence(self):
212
+        mail_parts = BodyMailParts()
213
+        a = BodyMailPart('a', BodyMailPartType.Main)
214
+        mail_parts._list.append(a)
215
+        b = BodyMailPart('b', BodyMailPartType.Quote)
216
+        mail_parts._list.append(b)
217
+        c = BodyMailPart('c', BodyMailPartType.Main)
218
+        mail_parts._list.append(c)
219
+        mail_parts.drop_part_type(BodyMailPartType.Quote)
220
+        assert len(mail_parts) == 1
221
+        assert mail_parts[0].text == 'ac'
222
+        assert mail_parts[0].part_type == BodyMailPartType.Main
223
+
224
+    def test_unit__get_nb_part_type(self):
225
+        mail_parts = BodyMailParts()
226
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Main) == 0
227
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Quote) == 0
228
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Signature) == 0
229
+        a = BodyMailPart('a', BodyMailPartType.Main)
230
+        mail_parts._list.append(a)
231
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Main) == 1
232
+        b = BodyMailPart('b', BodyMailPartType.Quote)
233
+        mail_parts._list.append(b)
234
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Quote) == 1
235
+        c = BodyMailPart('c', BodyMailPartType.Signature)
236
+        mail_parts._list.append(c)
237
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Main) == 1
238
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Quote) == 1
239
+        assert mail_parts.get_nb_part_type(BodyMailPartType.Signature) == 1
240
+
241
+    def test_unit__str(self):
242
+        mail_parts = BodyMailParts()
243
+        a = BodyMailPart('a', BodyMailPartType.Main)
244
+        mail_parts._list.append(a)
245
+        b = BodyMailPart('b', BodyMailPartType.Quote)
246
+        mail_parts._list.append(b)
247
+        c = BodyMailPart('c', BodyMailPartType.Signature)
248
+        mail_parts._list.append(c)
249
+        assert str(mail_parts) == 'abc'
250
+
251
+
252
+class TestParsedMail(TestStandard):
253
+
254
+    def test_other__check_gmail_mail(self):
255
+        text_only = '''<div dir="ltr">Voici le texte<br></div>'''
256
+        mail = ParsedHTMLMail(text_only)
257
+        elements = mail.get_elements()
258
+        assert len(elements) == 1
259
+        assert elements[0].part_type == BodyMailPartType.Main
260
+
261
+        text_and_signature = '''
262
+        <div dir="ltr">POF<br clear="all"><div><br>-- <br>
263
+        <div class="gmail_signature" data-smartmail="gmail_signature">
264
+        <div dir="ltr">Voici Ma signature. En HTML <br><ol>
265
+        <li>Plop</li>
266
+        <li>Plip</li>
267
+        <li>Plop<br>
268
+        </li></ol></div></div></div></div>
269
+        '''
270
+        mail = ParsedHTMLMail(text_and_signature)
271
+        elements = mail.get_elements()
272
+        assert len(elements) == 2
273
+        assert elements[0].part_type == BodyMailPartType.Main
274
+        assert elements[1].part_type == BodyMailPartType.Signature
275
+
276
+        text_and_quote = '''
277
+        <div dir="ltr">Réponse<br>
278
+        <div class="gmail_extra"><br>
279
+        <div class="gmail_quote">Le 28 novembre 2017 à 10:29, John Doe <span
280
+        dir="ltr">&lt;<a href="mailto:bidule@localhost.fr"
281
+        target="_blank">bidule@localhost.fr</a>&gt;</span>
282
+        a écrit :<br>
283
+        <blockquote class="gmail_quote" style="margin:0 0 0
284
+        .8ex;border-left:1px #ccc solid;padding-left:1ex">Voici ma réponse<br>
285
+        <br><br>
286
+        Le 28/11/2017 à 10:05, Foo Bar a écrit&nbsp;:<br>
287
+        <blockquote class="gmail_quote" style="margin:0 0 0
288
+        .8ex;border-left:1px #ccc solid;padding-left:1ex">
289
+        Voici le texte<span class="HOEnZb"><font color="#888888"><br>
290
+        </font></span></blockquote>
291
+        <span class="HOEnZb"><font color="#888888">
292
+        <br>
293
+        -- <br>
294
+        TEST DE signature<br>
295
+        </font></span></blockquote>
296
+        </div><br></div></div>
297
+        '''
298
+        mail = ParsedHTMLMail(text_and_quote)
299
+        elements = mail.get_elements()
300
+        assert len(elements) == 2
301
+        assert elements[0].part_type == BodyMailPartType.Main
302
+        assert elements[1].part_type == BodyMailPartType.Quote
303
+
304
+        text_quote_text = '''
305
+              <div dir="ltr">Avant<br>
306
+              <div class="gmail_extra"><br>
307
+              <div class="gmail_quote">Le 28 novembre 2017 à 10:29, John Doe 
308
+              <span dir="ltr">&lt;<a href="mailto:bidule@localhost.fr"
309
+              target="_blank">bidule@localhost.fr</a>&gt;</span>
310
+              a écrit :<br>
311
+              <blockquote class="gmail_quote" style="margin:0 0 0
312
+              .8ex;border-left:1px #ccc solid;padding-left:1ex">Voici ma
313
+              réponse<br>
314
+              <br>
315
+              <br>
316
+              Le 28/11/2017 à 10:05, Foo Bar a écrit&nbsp;:<br>
317
+              <blockquote class="gmail_quote" style="margin:0 0 0
318
+              .8ex;border-left:1px #ccc solid;padding-left:1ex">
319
+              Voici le texte<span class="HOEnZb"><font color="#888888"><br>
320
+              </font></span></blockquote>
321
+              <span class="HOEnZb"><font color="#888888">
322
+              <br>
323
+              -- <br>
324
+              TEST DE signature<br>
325
+              </font></span></blockquote>
326
+              </div>
327
+              <br>
328
+              </div>
329
+              <div class="gmail_extra">Aprés<br>
330
+              </div>
331
+              </div>
332
+              '''
333
+
334
+        mail = ParsedHTMLMail(text_quote_text)
335
+        elements = mail.get_elements()
336
+        assert len(elements) == 3
337
+        assert elements[0].part_type == BodyMailPartType.Main
338
+        assert elements[1].part_type == BodyMailPartType.Quote
339
+        assert elements[2].part_type == BodyMailPartType.Main
340
+
341
+
342
+        text_quote_signature = '''
343
+        <div dir="ltr">Hey !<br>
344
+                 </div>
345
+                 <div class="gmail_extra"><br>
346
+                 <div class="gmail_quote">Le 28 novembre 2017 à 10:29,
347
+                  John Doe <span
348
+                 dir="ltr">&lt;<a href="mailto:bidule@localhost.fr"
349
+                 target="_blank">bidule@localhost.fr</a>&gt;</span>
350
+                 a écrit :<br>
351
+                 <blockquote class="gmail_quote" style="margin:0 0 0
352
+                 .8ex;border-left:1px #ccc solid;padding-left:1ex">Voici ma
353
+                 réponse<br>
354
+                 <br>
355
+                 <br>
356
+                  Le 28/11/2017 à 10:05, Foo Bar a écrit&nbsp;:<br>
357
+                  <blockquote class="gmail_quote" style="margin:0 0 0
358
+                  .8ex;border-left:1px #ccc solid;padding-left:1ex">
359
+                  Voici le texte<span class="HOEnZb"><font color="#888888"><br>
360
+                  </font></span></blockquote>
361
+                  <span class="HOEnZb"><font color="#888888">
362
+                  <br>
363
+                  -- <br>
364
+                  TEST DE signature<br>
365
+                  </font></span></blockquote>
366
+                  </div>
367
+                  <br>
368
+                  <br clear="all">
369
+                  <br>
370
+                  -- <br>
371
+                  <div class="gmail_signature" data-smartmail="gmail_signature">
372
+                  <div dir="ltr">Voici Ma signature. En HTML <br>
373
+                  <ol>
374
+                  <li>Plop</li>
375
+                  <li>Plip</li>
376
+                  <li>Plop<br>
377
+                  </li>
378
+                  </ol>
379
+                  </div>
380
+                  </div>
381
+                  </div>
382
+                 '''
383
+
384
+        # INFO - G.M - 2017-11-28 -
385
+        # Now Quote + Signature block in Gmail is considered as one Quote
386
+        # Block.
387
+        mail = ParsedHTMLMail(text_quote_signature)
388
+        elements = mail.get_elements()
389
+        assert len(elements) == 2
390
+        assert elements[0].part_type == BodyMailPartType.Main
391
+        assert elements[1].part_type == BodyMailPartType.Quote
392
+
393
+        text_quote_text_sign = '''
394
+        <div dir="ltr">Test<br>
395
+        <div class="gmail_extra"><br>
396
+        <div class="gmail_quote">Le 28 novembre 2017 à 10:29, John Doe <span
397
+        dir="ltr">&lt;<a href="mailto:bidule@localhost.fr"
398
+        target="_blank">bidule@localhost.fr</a>&gt;</span>
399
+        a écrit :<br>
400
+        <blockquote class="gmail_quote" style="margin:0 0 0
401
+        .8ex;border-left:1px #ccc solid;padding-left:1ex">Voici ma
402
+        réponse<br>
403
+        <br>
404
+        <br>
405
+        Le 28/11/2017 à 10:05, Foo Bar a écrit&nbsp;:<br>
406
+        <blockquote class="gmail_quote" style="margin:0 0 0
407
+        .8ex;border-left:1px #ccc solid;padding-left:1ex">
408
+        Voici le texte<span class="HOEnZb"><font color="#888888"><br>
409
+        </font></span></blockquote>
410
+        <span class="HOEnZb"><font color="#888888">
411
+        <br>
412
+        -- <br>
413
+        TEST DE signature<br>
414
+        </font></span></blockquote>
415
+        </div>
416
+        <br>
417
+        <br>
418
+        </div>
419
+        <div class="gmail_extra">RE test<br clear="all">
420
+        </div>
421
+        <div class="gmail_extra"><br>
422
+        -- <br>
423
+        <div class="gmail_signature" data-smartmail="gmail_signature">
424
+        <div dir="ltr">Voici Ma signature. En HTML <br>
425
+        <ol>
426
+        <li>Plop</li>
427
+        <li>Plip</li>
428
+        <li>Plop<br>
429
+        </li>
430
+        </ol>
431
+        </div>
432
+        </div>
433
+        </div>
434
+        </div>
435
+        '''
436
+
437
+        mail = ParsedHTMLMail(text_quote_text_sign)
438
+        elements = mail.get_elements()
439
+        assert len(elements) == 4
440
+        assert elements[0].part_type == BodyMailPartType.Main
441
+        assert elements[1].part_type == BodyMailPartType.Quote
442
+        assert elements[2].part_type == BodyMailPartType.Main
443
+        assert elements[3].part_type == BodyMailPartType.Signature
444
+
445
+    def test_other__check_thunderbird_mail(self):
446
+
447
+        text_only = '''Coucou<br><br><br>'''
448
+        mail = ParsedHTMLMail(text_only)
449
+        elements = mail.get_elements()
450
+        assert len(elements) == 1
451
+        assert elements[0].part_type == BodyMailPartType.Main
452
+
453
+        text_and_signature = '''
454
+        <p>Test<br>
455
+        </p>
456
+        <div class="moz-signature">-- <br>
457
+          TEST DE signature</div>
458
+        '''
459
+        mail = ParsedHTMLMail(text_and_signature)
460
+        elements = mail.get_elements()
461
+        assert len(elements) == 2
462
+        assert elements[0].part_type == BodyMailPartType.Main
463
+        assert elements[1].part_type == BodyMailPartType.Signature
464
+
465
+        text_and_quote = '''
466
+            <p>Pof<br>
467
+            </p>
468
+            <br>
469
+            <div class="moz-cite-prefix">Le 28/11/2017 à 11:21, John Doe a
470
+              écrit&nbsp;:<br>
471
+            </div>
472
+            <blockquote type="cite"
473
+              cite="mid:658592c1-14de-2958-5187-3571edea0aac@localhost.fr">
474
+              <meta http-equiv="Context-Type" 
475
+              content="text/html; charset=utf-8">
476
+              <p>Test<br>
477
+              </p>
478
+              <div class="moz-signature">-- <br>
479
+                TEST DE signature</div>
480
+            </blockquote>
481
+            <br>
482
+        '''
483
+        mail = ParsedHTMLMail(text_and_quote)
484
+        elements = mail.get_elements()
485
+        assert len(elements) == 2
486
+        assert elements[0].part_type == BodyMailPartType.Main
487
+        assert elements[1].part_type == BodyMailPartType.Quote
488
+
489
+        text_quote_text = '''
490
+        <p>Pof<br>
491
+        </p>
492
+        <br>
493
+        <div class="moz-cite-prefix">Le 28/11/2017 à 11:54, 
494
+         Bidule a
495
+          écrit&nbsp;:<br>
496
+        </div>
497
+        <blockquote type="cite"
498
+          cite="mid:b541b451-bb31-77a4-45b9-ad89969d7962@localhost.fr">
499
+          <meta http-equiv="Context-Type" 
500
+          content="text/html; charset=utf-8">
501
+          <p>Pof<br>
502
+          </p>
503
+          <br>
504
+          <div class="moz-cite-prefix">Le 28/11/2017 à 11:21, John Doe a
505
+            écrit&nbsp;:<br>
506
+          </div>
507
+          <blockquote type="cite"
508
+            cite="mid:658592c1-14de-2958-5187-3571edea0aac@localhost.fr">
509
+            <p>Test<br>
510
+            </p>
511
+            <div class="moz-signature">-- <br>
512
+              TEST DE signature</div>
513
+          </blockquote>
514
+          <br>
515
+        </blockquote>
516
+        Pif<br>
517
+        '''
518
+
519
+        mail = ParsedHTMLMail(text_quote_text)
520
+        elements = mail.get_elements()
521
+        assert len(elements) == 3
522
+        assert elements[0].part_type == BodyMailPartType.Main
523
+        assert elements[1].part_type == BodyMailPartType.Quote
524
+        assert elements[2].part_type == BodyMailPartType.Main
525
+
526
+        text_quote_signature = '''
527
+        <p>Coucou<br>
528
+        </p>
529
+        <br>
530
+        <div class="moz-cite-prefix">Le 28/11/2017 à 11:22, Bidule a
531
+        écrit&nbsp;:<br>
532
+        </div>
533
+        <blockquote type="cite"
534
+        cite="mid:4e6923e2-796d-eccf-84b7-6824da4151ee@localhost.fr">Réponse <br>
535
+        <br>
536
+        Le 28/11/2017 à 11:21, John Doe a écrit&nbsp;: <br>
537
+        <blockquote type="cite"> <br>
538
+        Test <br>
539
+        <br>
540
+        --&nbsp;<br>
541
+        TEST DE signature <br>
542
+        </blockquote>
543
+        <br>
544
+        </blockquote>
545
+        <br>
546
+        <div class="moz-signature">-- <br>
547
+        TEST DE signature</div>
548
+        '''
549
+
550
+        mail = ParsedHTMLMail(text_quote_signature)
551
+        elements = mail.get_elements()
552
+        assert len(elements) == 3
553
+        assert elements[0].part_type == BodyMailPartType.Main
554
+        assert elements[1].part_type == BodyMailPartType.Quote
555
+        assert elements[2].part_type == BodyMailPartType.Signature
556
+
557
+        text_quote_text_sign = '''
558
+        <p>Avant<br>
559
+        </p>
560
+        <br>
561
+        <div class="moz-cite-prefix">Le 28/11/2017 à 11:19, Bidule a
562
+          écrit&nbsp;:<br>
563
+        </div>
564
+        <blockquote type="cite"
565
+          cite="mid:635df73c-d3c9-f2e9-2304-24ff536bfa16@localhost.fr">Coucou 
566
+          <br><br>
567
+        </blockquote>
568
+        Aprés<br>
569
+        <br>
570
+        <div class="moz-signature">-- <br>
571
+          TEST DE signature</div>
572
+        '''
573
+
574
+        mail = ParsedHTMLMail(text_quote_text_sign)
575
+        elements = mail.get_elements()
576
+        assert len(elements) == 4
577
+        assert elements[0].part_type == BodyMailPartType.Main
578
+        assert elements[1].part_type == BodyMailPartType.Quote
579
+        assert elements[2].part_type == BodyMailPartType.Main
580
+        assert elements[3].part_type == BodyMailPartType.Signature
581
+
582
+