Browse Source

Refactoring

Guénaël Muller 6 years ago
parent
commit
2d8878304f
1 changed files with 70 additions and 16 deletions
  1. 70 16
      tracim/tracim/lib/email_body_parser.py

+ 70 - 16
tracim/tracim/lib/email_body_parser.py View File

47
 
47
 
48
     def __delitem__(self, index) -> None:
48
     def __delitem__(self, index) -> None:
49
         del self._list[index]
49
         del self._list[index]
50
-        # Todo : check consistance
50
+        # FIXME - G.M - 2017-11-27 - Preserve BodyMailParts consistence
51
+        # check elem after and before index and merge them if necessary.
51
 
52
 
52
     def append(self, value) -> None:
53
     def append(self, value) -> None:
53
         BodyMailParts._check_value(value)
54
         BodyMailParts._check_value(value)
100
 class SignatureIndexError(Exception):
101
 class SignatureIndexError(Exception):
101
     pass
102
     pass
102
 
103
 
104
+class ProprietaryHTMLProperties(object):
105
+    # Gmail
106
+    Gmail_extras_class = 'gmail_extra'
107
+    Gmail_quote_class = 'gmail_quote'
108
+    Gmail_signature_class = 'gmail_signature'
109
+    # Thunderbird
110
+    Thunderbird_quote_prefix_class = 'moz-cite-prefix'
111
+    Thunderbird_signature_class = 'moz-signature'
112
+    # Outlook.com
113
+    Outlook_com_quote_id = 'divRplyFwdMsg'
114
+    Outlook_com_signature_id = 'Signature'
115
+    Outlook_com_wrapper_id = 'divtagdefaultwrapper'
116
+    # Yahoo
117
+    Yahoo_quote_class = 'yahoo_quoted'
118
+    # Roundcube
119
+    # INFO - G.M - 2017-11-29 - New tag
120
+    # see : https://github.com/roundcube/roundcubemail/issues/6049
121
+    Roundcube_quote_prefix_class = 'reply-intro'
103
 
122
 
104
 class HtmlChecker(object):
123
 class HtmlChecker(object):
105
 
124
 
146
             cls,
165
             cls,
147
             elem: typing.Union[Tag, NavigableString]
166
             elem: typing.Union[Tag, NavigableString]
148
     ) -> bool:
167
     ) -> bool:
149
-        return cls._has_attr_value(elem, 'class', 'moz-cite-prefix')
168
+        return cls._has_attr_value(
169
+            elem,
170
+            'class',
171
+            ProprietaryHTMLProperties.Thunderbird_quote_prefix_class)
150
 
172
 
151
     @classmethod
173
     @classmethod
152
     def _is_gmail_quote(
174
     def _is_gmail_quote(
153
             cls,
175
             cls,
154
             elem: typing.Union[Tag, NavigableString]
176
             elem: typing.Union[Tag, NavigableString]
155
     ) -> bool:
177
     ) -> bool:
156
-        if cls._has_attr_value(elem, 'class', 'gmail_extra'):
178
+        if cls._has_attr_value(
179
+                elem,
180
+                'class',
181
+                ProprietaryHTMLProperties.Gmail_extras_class):
157
             for child in elem.children:
182
             for child in elem.children:
158
-                if cls._has_attr_value(child, 'class', 'gmail_quote'):
183
+                if cls._has_attr_value(
184
+                    child,
185
+                    'class',
186
+                    ProprietaryHTMLProperties.Gmail_quote_class):
159
                     return True
187
                     return True
160
         return False
188
         return False
161
 
189
 
164
         cls,
192
         cls,
165
         elem: typing.Union[Tag, NavigableString]
193
         elem: typing.Union[Tag, NavigableString]
166
     ) -> bool:
194
     ) -> bool:
167
-        if cls._has_attr_value(elem, 'id', 'divRplyFwdMsg'):
195
+        if cls._has_attr_value(
196
+                elem,
197
+                'id',
198
+                ProprietaryHTMLProperties.Outlook_com_quote_id):
168
             return True
199
             return True
169
         return False
200
         return False
170
 
201
 
173
             cls,
204
             cls,
174
             elem: typing.Union[Tag, NavigableString]
205
             elem: typing.Union[Tag, NavigableString]
175
     ) -> bool:
206
     ) -> bool:
176
-        return cls._has_attr_value(elem, 'class', 'yahoo_quoted')
207
+        return cls._has_attr_value(
208
+            elem,
209
+            'class',
210
+            ProprietaryHTMLProperties.Yahoo_quote_class)
177
 
211
 
178
     @classmethod
212
     @classmethod
179
     def _is_roundcube_quote(
213
     def _is_roundcube_quote(
180
             cls,
214
             cls,
181
             elem: typing.Union[Tag, NavigableString]
215
             elem: typing.Union[Tag, NavigableString]
182
     ) -> bool:
216
     ) -> bool:
183
-        return cls._has_attr_value(elem, 'id', 'reply-intro')
217
+        return cls._has_attr_value(
218
+            elem,
219
+            'id',
220
+            ProprietaryHTMLProperties.Roundcube_quote_prefix_class)
184
 
221
 
185
 
222
 
186
 class HtmlMailSignatureChecker(HtmlChecker):
223
 class HtmlMailSignatureChecker(HtmlChecker):
199
             cls,
236
             cls,
200
             elem: typing.Union[Tag, NavigableString]
237
             elem: typing.Union[Tag, NavigableString]
201
     ) -> bool:
238
     ) -> bool:
202
-        return cls._has_attr_value(elem,
203
-                                   'class',
204
-                                   'moz-signature')
239
+        return cls._has_attr_value(
240
+            elem,
241
+            'class',
242
+            ProprietaryHTMLProperties.Thunderbird_signature_class)
205
 
243
 
206
     @classmethod
244
     @classmethod
207
     def _is_gmail_signature(
245
     def _is_gmail_signature(
208
             cls,
246
             cls,
209
             elem: typing.Union[Tag, NavigableString]
247
             elem: typing.Union[Tag, NavigableString]
210
     ) -> bool:
248
     ) -> bool:
211
-        if cls._has_attr_value(elem, 'class', 'gmail_signature'):
249
+        if cls._has_attr_value(
250
+                elem,
251
+                'class',
252
+                ProprietaryHTMLProperties.Gmail_signature_class):
212
             return True
253
             return True
213
-        if cls._has_attr_value(elem, 'class', 'gmail_extra'):
254
+        if cls._has_attr_value(
255
+                elem,
256
+                'class',
257
+                ProprietaryHTMLProperties.Gmail_extras_class):
214
             for child in elem.children:
258
             for child in elem.children:
215
-                if cls._has_attr_value(child, 'class', 'gmail_signature'):
259
+                if cls._has_attr_value(
260
+                        child,
261
+                        'class',
262
+                        ProprietaryHTMLProperties.Gmail_signature_class):
216
                     return True
263
                     return True
217
         if isinstance(elem, Tag) and elem.name.lower() == 'div':
264
         if isinstance(elem, Tag) and elem.name.lower() == 'div':
218
             for child in elem.children:
265
             for child in elem.children:
219
-                if cls._has_attr_value(child, 'class', 'gmail_signature'):
266
+                if cls._has_attr_value(
267
+                        child,
268
+                        'class',
269
+                        ProprietaryHTMLProperties.Gmail_signature_class):
220
                     return True
270
                     return True
221
         return False
271
         return False
222
 
272
 
225
             cls,
275
             cls,
226
             elem: typing.Union[Tag, NavigableString]
276
             elem: typing.Union[Tag, NavigableString]
227
     ) -> bool:
277
     ) -> bool:
228
-        if cls._has_attr_value(elem, 'id', 'Signature'):
278
+        if cls._has_attr_value(
279
+                elem,
280
+                'id',
281
+                ProprietaryHTMLProperties.Outlook_com_signature_id):
229
             return True
282
             return True
230
         return False
283
         return False
231
 
284
 
275
             # if Text -> Signature -> Quote Mail
328
             # if Text -> Signature -> Quote Mail
276
             # Text and signature are wrapped into divtagdefaultwrapper
329
             # Text and signature are wrapped into divtagdefaultwrapper
277
             if tag.attrs.get('id'):
330
             if tag.attrs.get('id'):
278
-                if 'divtagdefaultwrapper' in tag.attrs['id']:
331
+                if ProprietaryHTMLProperties.Outlook_com_wrapper_id\
332
+                        in tag.attrs['id']:
279
                     tag.unwrap()
333
                     tag.unwrap()
280
             # Hack - G.M - 2017-11-28 : remove tag with no enclosure
334
             # Hack - G.M - 2017-11-28 : remove tag with no enclosure
281
             # <br> and <hr> tag alone broke html.parser tree,
335
             # <br> and <hr> tag alone broke html.parser tree,