|
@@ -47,7 +47,8 @@ class BodyMailParts(object):
|
47
|
47
|
|
48
|
48
|
def __delitem__(self, index) -> None:
|
49
|
49
|
del self._list[index]
|
50
|
|
- # Todo : check consistance
|
|
50
|
+ # FIXME - G.M - 2017-11-27 - Preserve BodyMailParts consistence
|
|
51
|
+ # check elem after and before index and merge them if necessary.
|
51
|
52
|
|
52
|
53
|
def append(self, value) -> None:
|
53
|
54
|
BodyMailParts._check_value(value)
|
|
@@ -100,6 +101,24 @@ class BodyMailParts(object):
|
100
|
101
|
class SignatureIndexError(Exception):
|
101
|
102
|
pass
|
102
|
103
|
|
|
104
|
+class ProprietaryHTMLProperties(object):
|
|
105
|
+ # Gmail
|
|
106
|
+ Gmail_extras_class = 'gmail_extra'
|
|
107
|
+ Gmail_quote_class = 'gmail_quote'
|
|
108
|
+ Gmail_signature_class = 'gmail_signature'
|
|
109
|
+ # Thunderbird
|
|
110
|
+ Thunderbird_quote_prefix_class = 'moz-cite-prefix'
|
|
111
|
+ Thunderbird_signature_class = 'moz-signature'
|
|
112
|
+ # Outlook.com
|
|
113
|
+ Outlook_com_quote_id = 'divRplyFwdMsg'
|
|
114
|
+ Outlook_com_signature_id = 'Signature'
|
|
115
|
+ Outlook_com_wrapper_id = 'divtagdefaultwrapper'
|
|
116
|
+ # Yahoo
|
|
117
|
+ Yahoo_quote_class = 'yahoo_quoted'
|
|
118
|
+ # Roundcube
|
|
119
|
+ # INFO - G.M - 2017-11-29 - New tag
|
|
120
|
+ # see : https://github.com/roundcube/roundcubemail/issues/6049
|
|
121
|
+ Roundcube_quote_prefix_class = 'reply-intro'
|
103
|
122
|
|
104
|
123
|
class HtmlChecker(object):
|
105
|
124
|
|
|
@@ -146,16 +165,25 @@ class HtmlMailQuoteChecker(HtmlChecker):
|
146
|
165
|
cls,
|
147
|
166
|
elem: typing.Union[Tag, NavigableString]
|
148
|
167
|
) -> bool:
|
149
|
|
- return cls._has_attr_value(elem, 'class', 'moz-cite-prefix')
|
|
168
|
+ return cls._has_attr_value(
|
|
169
|
+ elem,
|
|
170
|
+ 'class',
|
|
171
|
+ ProprietaryHTMLProperties.Thunderbird_quote_prefix_class)
|
150
|
172
|
|
151
|
173
|
@classmethod
|
152
|
174
|
def _is_gmail_quote(
|
153
|
175
|
cls,
|
154
|
176
|
elem: typing.Union[Tag, NavigableString]
|
155
|
177
|
) -> bool:
|
156
|
|
- if cls._has_attr_value(elem, 'class', 'gmail_extra'):
|
|
178
|
+ if cls._has_attr_value(
|
|
179
|
+ elem,
|
|
180
|
+ 'class',
|
|
181
|
+ ProprietaryHTMLProperties.Gmail_extras_class):
|
157
|
182
|
for child in elem.children:
|
158
|
|
- if cls._has_attr_value(child, 'class', 'gmail_quote'):
|
|
183
|
+ if cls._has_attr_value(
|
|
184
|
+ child,
|
|
185
|
+ 'class',
|
|
186
|
+ ProprietaryHTMLProperties.Gmail_quote_class):
|
159
|
187
|
return True
|
160
|
188
|
return False
|
161
|
189
|
|
|
@@ -164,7 +192,10 @@ class HtmlMailQuoteChecker(HtmlChecker):
|
164
|
192
|
cls,
|
165
|
193
|
elem: typing.Union[Tag, NavigableString]
|
166
|
194
|
) -> bool:
|
167
|
|
- if cls._has_attr_value(elem, 'id', 'divRplyFwdMsg'):
|
|
195
|
+ if cls._has_attr_value(
|
|
196
|
+ elem,
|
|
197
|
+ 'id',
|
|
198
|
+ ProprietaryHTMLProperties.Outlook_com_quote_id):
|
168
|
199
|
return True
|
169
|
200
|
return False
|
170
|
201
|
|
|
@@ -173,14 +204,20 @@ class HtmlMailQuoteChecker(HtmlChecker):
|
173
|
204
|
cls,
|
174
|
205
|
elem: typing.Union[Tag, NavigableString]
|
175
|
206
|
) -> bool:
|
176
|
|
- return cls._has_attr_value(elem, 'class', 'yahoo_quoted')
|
|
207
|
+ return cls._has_attr_value(
|
|
208
|
+ elem,
|
|
209
|
+ 'class',
|
|
210
|
+ ProprietaryHTMLProperties.Yahoo_quote_class)
|
177
|
211
|
|
178
|
212
|
@classmethod
|
179
|
213
|
def _is_roundcube_quote(
|
180
|
214
|
cls,
|
181
|
215
|
elem: typing.Union[Tag, NavigableString]
|
182
|
216
|
) -> bool:
|
183
|
|
- return cls._has_attr_value(elem, 'id', 'reply-intro')
|
|
217
|
+ return cls._has_attr_value(
|
|
218
|
+ elem,
|
|
219
|
+ 'id',
|
|
220
|
+ ProprietaryHTMLProperties.Roundcube_quote_prefix_class)
|
184
|
221
|
|
185
|
222
|
|
186
|
223
|
class HtmlMailSignatureChecker(HtmlChecker):
|
|
@@ -199,24 +236,37 @@ class HtmlMailSignatureChecker(HtmlChecker):
|
199
|
236
|
cls,
|
200
|
237
|
elem: typing.Union[Tag, NavigableString]
|
201
|
238
|
) -> bool:
|
202
|
|
- return cls._has_attr_value(elem,
|
203
|
|
- 'class',
|
204
|
|
- 'moz-signature')
|
|
239
|
+ return cls._has_attr_value(
|
|
240
|
+ elem,
|
|
241
|
+ 'class',
|
|
242
|
+ ProprietaryHTMLProperties.Thunderbird_signature_class)
|
205
|
243
|
|
206
|
244
|
@classmethod
|
207
|
245
|
def _is_gmail_signature(
|
208
|
246
|
cls,
|
209
|
247
|
elem: typing.Union[Tag, NavigableString]
|
210
|
248
|
) -> bool:
|
211
|
|
- if cls._has_attr_value(elem, 'class', 'gmail_signature'):
|
|
249
|
+ if cls._has_attr_value(
|
|
250
|
+ elem,
|
|
251
|
+ 'class',
|
|
252
|
+ ProprietaryHTMLProperties.Gmail_signature_class):
|
212
|
253
|
return True
|
213
|
|
- if cls._has_attr_value(elem, 'class', 'gmail_extra'):
|
|
254
|
+ if cls._has_attr_value(
|
|
255
|
+ elem,
|
|
256
|
+ 'class',
|
|
257
|
+ ProprietaryHTMLProperties.Gmail_extras_class):
|
214
|
258
|
for child in elem.children:
|
215
|
|
- if cls._has_attr_value(child, 'class', 'gmail_signature'):
|
|
259
|
+ if cls._has_attr_value(
|
|
260
|
+ child,
|
|
261
|
+ 'class',
|
|
262
|
+ ProprietaryHTMLProperties.Gmail_signature_class):
|
216
|
263
|
return True
|
217
|
264
|
if isinstance(elem, Tag) and elem.name.lower() == 'div':
|
218
|
265
|
for child in elem.children:
|
219
|
|
- if cls._has_attr_value(child, 'class', 'gmail_signature'):
|
|
266
|
+ if cls._has_attr_value(
|
|
267
|
+ child,
|
|
268
|
+ 'class',
|
|
269
|
+ ProprietaryHTMLProperties.Gmail_signature_class):
|
220
|
270
|
return True
|
221
|
271
|
return False
|
222
|
272
|
|
|
@@ -225,7 +275,10 @@ class HtmlMailSignatureChecker(HtmlChecker):
|
225
|
275
|
cls,
|
226
|
276
|
elem: typing.Union[Tag, NavigableString]
|
227
|
277
|
) -> bool:
|
228
|
|
- if cls._has_attr_value(elem, 'id', 'Signature'):
|
|
278
|
+ if cls._has_attr_value(
|
|
279
|
+ elem,
|
|
280
|
+ 'id',
|
|
281
|
+ ProprietaryHTMLProperties.Outlook_com_signature_id):
|
229
|
282
|
return True
|
230
|
283
|
return False
|
231
|
284
|
|
|
@@ -275,7 +328,8 @@ class ParsedHTMLMail(object):
|
275
|
328
|
# if Text -> Signature -> Quote Mail
|
276
|
329
|
# Text and signature are wrapped into divtagdefaultwrapper
|
277
|
330
|
if tag.attrs.get('id'):
|
278
|
|
- if 'divtagdefaultwrapper' in tag.attrs['id']:
|
|
331
|
+ if ProprietaryHTMLProperties.Outlook_com_wrapper_id\
|
|
332
|
+ in tag.attrs['id']:
|
279
|
333
|
tag.unwrap()
|
280
|
334
|
# Hack - G.M - 2017-11-28 : remove tag with no enclosure
|
281
|
335
|
# <br> and <hr> tag alone broke html.parser tree,
|