Parcourir la source

PEP8, renaming, comments

Guénaël Muller il y a 6 ans
Parent
révision
6c20a1594a
1 fichiers modifiés avec 29 ajouts et 17 suppressions
  1. 29 17
      tracim/tracim/lib/email_body_parser.py

+ 29 - 17
tracim/tracim/lib/email_body_parser.py Voir le fichier

@@ -4,6 +4,7 @@ from bs4 import BeautifulSoup
4 4
 from bs4 import Tag
5 5
 from bs4 import NavigableString
6 6
 
7
+# BodyParts and Body Parts Objects #
7 8
 
8 9
 class BodyMailPartType(object):
9 10
     Signature = 'sign'
@@ -101,8 +102,13 @@ class BodyMailParts(object):
101 102
 class SignatureIndexError(Exception):
102 103
     pass
103 104
 
105
+# Elements Checkers #
104 106
 
105
-class ProprietaryHTMLProperties(object):
107
+class ProprietaryHTMLAttrValues(object):
108
+    """
109
+    This are all Proprietary (mail client specific) html attr value we need to
110
+    check Html Elements
111
+    """
106 112
     # Gmail
107 113
     Gmail_extras_class = 'gmail_extra'
108 114
     Gmail_quote_class = 'gmail_quote'
@@ -122,7 +128,6 @@ class ProprietaryHTMLProperties(object):
122 128
     Roundcube_quote_prefix_class = 'reply-intro'
123 129
 
124 130
 
125
-
126 131
 class HtmlChecker(object):
127 132
 
128 133
     @classmethod
@@ -171,7 +176,7 @@ class HtmlMailQuoteChecker(HtmlChecker):
171 176
         return cls._has_attr_value(
172 177
             elem,
173 178
             'class',
174
-            ProprietaryHTMLProperties.Thunderbird_quote_prefix_class)
179
+            ProprietaryHTMLAttrValues.Thunderbird_quote_prefix_class)
175 180
 
176 181
     @classmethod
177 182
     def _is_gmail_quote(
@@ -181,12 +186,12 @@ class HtmlMailQuoteChecker(HtmlChecker):
181 186
         if cls._has_attr_value(
182 187
                 elem,
183 188
                 'class',
184
-                ProprietaryHTMLProperties.Gmail_extras_class):
189
+                ProprietaryHTMLAttrValues.Gmail_extras_class):
185 190
             for child in elem.children:
186 191
                 if cls._has_attr_value(
187 192
                         child,
188 193
                         'class',
189
-                        ProprietaryHTMLProperties.Gmail_quote_class):
194
+                        ProprietaryHTMLAttrValues.Gmail_quote_class):
190 195
                     return True
191 196
         return False
192 197
 
@@ -198,7 +203,7 @@ class HtmlMailQuoteChecker(HtmlChecker):
198 203
         if cls._has_attr_value(
199 204
                 elem,
200 205
                 'id',
201
-                ProprietaryHTMLProperties.Outlook_com_quote_id):
206
+                ProprietaryHTMLAttrValues.Outlook_com_quote_id):
202 207
             return True
203 208
         return False
204 209
 
@@ -210,7 +215,7 @@ class HtmlMailQuoteChecker(HtmlChecker):
210 215
         return cls._has_attr_value(
211 216
             elem,
212 217
             'class',
213
-            ProprietaryHTMLProperties.Yahoo_quote_class)
218
+            ProprietaryHTMLAttrValues.Yahoo_quote_class)
214 219
 
215 220
     @classmethod
216 221
     def _is_roundcube_quote(
@@ -220,7 +225,7 @@ class HtmlMailQuoteChecker(HtmlChecker):
220 225
         return cls._has_attr_value(
221 226
             elem,
222 227
             'id',
223
-            ProprietaryHTMLProperties.Roundcube_quote_prefix_class)
228
+            ProprietaryHTMLAttrValues.Roundcube_quote_prefix_class)
224 229
 
225 230
 
226 231
 class HtmlMailSignatureChecker(HtmlChecker):
@@ -242,7 +247,7 @@ class HtmlMailSignatureChecker(HtmlChecker):
242 247
         return cls._has_attr_value(
243 248
             elem,
244 249
             'class',
245
-            ProprietaryHTMLProperties.Thunderbird_signature_class)
250
+            ProprietaryHTMLAttrValues.Thunderbird_signature_class)
246 251
 
247 252
     @classmethod
248 253
     def _is_gmail_signature(
@@ -252,24 +257,24 @@ class HtmlMailSignatureChecker(HtmlChecker):
252 257
         if cls._has_attr_value(
253 258
                 elem,
254 259
                 'class',
255
-                ProprietaryHTMLProperties.Gmail_signature_class):
260
+                ProprietaryHTMLAttrValues.Gmail_signature_class):
256 261
             return True
257 262
         if cls._has_attr_value(
258 263
                 elem,
259 264
                 'class',
260
-                ProprietaryHTMLProperties.Gmail_extras_class):
265
+                ProprietaryHTMLAttrValues.Gmail_extras_class):
261 266
             for child in elem.children:
262 267
                 if cls._has_attr_value(
263 268
                         child,
264 269
                         'class',
265
-                        ProprietaryHTMLProperties.Gmail_signature_class):
270
+                        ProprietaryHTMLAttrValues.Gmail_signature_class):
266 271
                     return True
267 272
         if isinstance(elem, Tag) and elem.name.lower() == 'div':
268 273
             for child in elem.children:
269 274
                 if cls._has_attr_value(
270 275
                         child,
271 276
                         'class',
272
-                        ProprietaryHTMLProperties.Gmail_signature_class):
277
+                        ProprietaryHTMLAttrValues.Gmail_signature_class):
273 278
                     return True
274 279
         return False
275 280
 
@@ -281,20 +286,27 @@ class HtmlMailSignatureChecker(HtmlChecker):
281 286
         if cls._has_attr_value(
282 287
                 elem,
283 288
                 'id',
284
-                ProprietaryHTMLProperties.Outlook_com_signature_id):
289
+                ProprietaryHTMLAttrValues.Outlook_com_signature_id):
285 290
             return True
286 291
         return False
287 292
 
293
+# ParsedHTMLMail #
294
+
288 295
 
289 296
 class PreSanitizeConfig(object):
297
+    """
298
+    To avoid problems, html need to be a bit during parsing to distinct
299
+    Main,Quote and Signature elements
300
+    """
290 301
     Ignored_tags = ['br', 'hr', 'script', 'style']
291
-    meta_tag = ['body','div']
302
+    meta_tag = ['body', 'div']
303
+
292 304
 
293 305
 class ParsedHTMLMail(object):
294 306
     """
295 307
     Parse HTML Mail depending of some rules.
296 308
     Distinct part of html mail body using BodyMailParts object and
297
-    process different rules.
309
+    process differents rules using HtmlChecker(s)
298 310
     """
299 311
 
300 312
     def __init__(self, html_body: str):
@@ -334,7 +346,7 @@ class ParsedHTMLMail(object):
334 346
             # if Text -> Signature -> Quote Mail
335 347
             # Text and signature are wrapped into divtagdefaultwrapper
336 348
             if tag.attrs.get('id'):
337
-                if ProprietaryHTMLProperties.Outlook_com_wrapper_id\
349
+                if ProprietaryHTMLAttrValues.Outlook_com_wrapper_id\
338 350
                         in tag.attrs['id']:
339 351
                     tag.unwrap()
340 352
         return tree