Browse Source

PEP8, renaming, comments

Guénaël Muller 6 years ago
parent
commit
6c20a1594a
1 changed files with 29 additions and 17 deletions
  1. 29 17
      tracim/tracim/lib/email_body_parser.py

+ 29 - 17
tracim/tracim/lib/email_body_parser.py View File

4
 from bs4 import Tag
4
 from bs4 import Tag
5
 from bs4 import NavigableString
5
 from bs4 import NavigableString
6
 
6
 
7
+# BodyParts and Body Parts Objects #
7
 
8
 
8
 class BodyMailPartType(object):
9
 class BodyMailPartType(object):
9
     Signature = 'sign'
10
     Signature = 'sign'
101
 class SignatureIndexError(Exception):
102
 class SignatureIndexError(Exception):
102
     pass
103
     pass
103
 
104
 
105
+# Elements Checkers #
104
 
106
 
105
-class ProprietaryHTMLProperties(object):
107
+class ProprietaryHTMLAttrValues(object):
108
+    """
109
+    This are all Proprietary (mail client specific) html attr value we need to
110
+    check Html Elements
111
+    """
106
     # Gmail
112
     # Gmail
107
     Gmail_extras_class = 'gmail_extra'
113
     Gmail_extras_class = 'gmail_extra'
108
     Gmail_quote_class = 'gmail_quote'
114
     Gmail_quote_class = 'gmail_quote'
122
     Roundcube_quote_prefix_class = 'reply-intro'
128
     Roundcube_quote_prefix_class = 'reply-intro'
123
 
129
 
124
 
130
 
125
-
126
 class HtmlChecker(object):
131
 class HtmlChecker(object):
127
 
132
 
128
     @classmethod
133
     @classmethod
171
         return cls._has_attr_value(
176
         return cls._has_attr_value(
172
             elem,
177
             elem,
173
             'class',
178
             'class',
174
-            ProprietaryHTMLProperties.Thunderbird_quote_prefix_class)
179
+            ProprietaryHTMLAttrValues.Thunderbird_quote_prefix_class)
175
 
180
 
176
     @classmethod
181
     @classmethod
177
     def _is_gmail_quote(
182
     def _is_gmail_quote(
181
         if cls._has_attr_value(
186
         if cls._has_attr_value(
182
                 elem,
187
                 elem,
183
                 'class',
188
                 'class',
184
-                ProprietaryHTMLProperties.Gmail_extras_class):
189
+                ProprietaryHTMLAttrValues.Gmail_extras_class):
185
             for child in elem.children:
190
             for child in elem.children:
186
                 if cls._has_attr_value(
191
                 if cls._has_attr_value(
187
                         child,
192
                         child,
188
                         'class',
193
                         'class',
189
-                        ProprietaryHTMLProperties.Gmail_quote_class):
194
+                        ProprietaryHTMLAttrValues.Gmail_quote_class):
190
                     return True
195
                     return True
191
         return False
196
         return False
192
 
197
 
198
         if cls._has_attr_value(
203
         if cls._has_attr_value(
199
                 elem,
204
                 elem,
200
                 'id',
205
                 'id',
201
-                ProprietaryHTMLProperties.Outlook_com_quote_id):
206
+                ProprietaryHTMLAttrValues.Outlook_com_quote_id):
202
             return True
207
             return True
203
         return False
208
         return False
204
 
209
 
210
         return cls._has_attr_value(
215
         return cls._has_attr_value(
211
             elem,
216
             elem,
212
             'class',
217
             'class',
213
-            ProprietaryHTMLProperties.Yahoo_quote_class)
218
+            ProprietaryHTMLAttrValues.Yahoo_quote_class)
214
 
219
 
215
     @classmethod
220
     @classmethod
216
     def _is_roundcube_quote(
221
     def _is_roundcube_quote(
220
         return cls._has_attr_value(
225
         return cls._has_attr_value(
221
             elem,
226
             elem,
222
             'id',
227
             'id',
223
-            ProprietaryHTMLProperties.Roundcube_quote_prefix_class)
228
+            ProprietaryHTMLAttrValues.Roundcube_quote_prefix_class)
224
 
229
 
225
 
230
 
226
 class HtmlMailSignatureChecker(HtmlChecker):
231
 class HtmlMailSignatureChecker(HtmlChecker):
242
         return cls._has_attr_value(
247
         return cls._has_attr_value(
243
             elem,
248
             elem,
244
             'class',
249
             'class',
245
-            ProprietaryHTMLProperties.Thunderbird_signature_class)
250
+            ProprietaryHTMLAttrValues.Thunderbird_signature_class)
246
 
251
 
247
     @classmethod
252
     @classmethod
248
     def _is_gmail_signature(
253
     def _is_gmail_signature(
252
         if cls._has_attr_value(
257
         if cls._has_attr_value(
253
                 elem,
258
                 elem,
254
                 'class',
259
                 'class',
255
-                ProprietaryHTMLProperties.Gmail_signature_class):
260
+                ProprietaryHTMLAttrValues.Gmail_signature_class):
256
             return True
261
             return True
257
         if cls._has_attr_value(
262
         if cls._has_attr_value(
258
                 elem,
263
                 elem,
259
                 'class',
264
                 'class',
260
-                ProprietaryHTMLProperties.Gmail_extras_class):
265
+                ProprietaryHTMLAttrValues.Gmail_extras_class):
261
             for child in elem.children:
266
             for child in elem.children:
262
                 if cls._has_attr_value(
267
                 if cls._has_attr_value(
263
                         child,
268
                         child,
264
                         'class',
269
                         'class',
265
-                        ProprietaryHTMLProperties.Gmail_signature_class):
270
+                        ProprietaryHTMLAttrValues.Gmail_signature_class):
266
                     return True
271
                     return True
267
         if isinstance(elem, Tag) and elem.name.lower() == 'div':
272
         if isinstance(elem, Tag) and elem.name.lower() == 'div':
268
             for child in elem.children:
273
             for child in elem.children:
269
                 if cls._has_attr_value(
274
                 if cls._has_attr_value(
270
                         child,
275
                         child,
271
                         'class',
276
                         'class',
272
-                        ProprietaryHTMLProperties.Gmail_signature_class):
277
+                        ProprietaryHTMLAttrValues.Gmail_signature_class):
273
                     return True
278
                     return True
274
         return False
279
         return False
275
 
280
 
281
         if cls._has_attr_value(
286
         if cls._has_attr_value(
282
                 elem,
287
                 elem,
283
                 'id',
288
                 'id',
284
-                ProprietaryHTMLProperties.Outlook_com_signature_id):
289
+                ProprietaryHTMLAttrValues.Outlook_com_signature_id):
285
             return True
290
             return True
286
         return False
291
         return False
287
 
292
 
293
+# ParsedHTMLMail #
294
+
288
 
295
 
289
 class PreSanitizeConfig(object):
296
 class PreSanitizeConfig(object):
297
+    """
298
+    To avoid problems, html need to be a bit during parsing to distinct
299
+    Main,Quote and Signature elements
300
+    """
290
     Ignored_tags = ['br', 'hr', 'script', 'style']
301
     Ignored_tags = ['br', 'hr', 'script', 'style']
291
-    meta_tag = ['body','div']
302
+    meta_tag = ['body', 'div']
303
+
292
 
304
 
293
 class ParsedHTMLMail(object):
305
 class ParsedHTMLMail(object):
294
     """
306
     """
295
     Parse HTML Mail depending of some rules.
307
     Parse HTML Mail depending of some rules.
296
     Distinct part of html mail body using BodyMailParts object and
308
     Distinct part of html mail body using BodyMailParts object and
297
-    process different rules.
309
+    process differents rules using HtmlChecker(s)
298
     """
310
     """
299
 
311
 
300
     def __init__(self, html_body: str):
312
     def __init__(self, html_body: str):
334
             # if Text -> Signature -> Quote Mail
346
             # if Text -> Signature -> Quote Mail
335
             # Text and signature are wrapped into divtagdefaultwrapper
347
             # Text and signature are wrapped into divtagdefaultwrapper
336
             if tag.attrs.get('id'):
348
             if tag.attrs.get('id'):
337
-                if ProprietaryHTMLProperties.Outlook_com_wrapper_id\
349
+                if ProprietaryHTMLAttrValues.Outlook_com_wrapper_id\
338
                         in tag.attrs['id']:
350
                         in tag.attrs['id']:
339
                     tag.unwrap()
351
                     tag.unwrap()
340
         return tree
352
         return tree