Browse Source

refactoring sanitizer_config

Guénaël Muller 7 years ago
parent
commit
584b1a946c

+ 17 - 36
tracim/tracim/lib/email_processing/sanitizer.py View File

@@ -1,42 +1,23 @@
1 1
 from bs4 import BeautifulSoup, Tag
2
-
2
+from tracim.lib.email_processing.sanitizer_config.attrs_whitelist import \
3
+    ATTRS_WHITELIST
4
+from tracim.lib.email_processing.sanitizer_config.class_blacklist import \
5
+    CLASS_BLACKLIST
6
+from tracim.lib.email_processing.sanitizer_config.id_blacklist import \
7
+    ID_BLACKLIST
8
+from tracim.lib.email_processing.sanitizer_config.tag_blacklist import \
9
+    TAG_BLACKLIST
10
+from tracim.lib.email_processing.sanitizer_config.tag_whitelist import \
11
+    TAG_WHITELIST
3 12
 
4 13
 class HtmlSanitizerConfig(object):
5
-    # some Default_html_tags type
6
-    HTML_Heading_tag = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
7
-    HTML_Text_parts_tag = ['p',
8
-                           'br', 'hr',
9
-                           'pre', 'code', 'samp',  # preformatted content
10
-                           'q', 'blockquote',  # quotes
11
-                           ]
12
-    HTML_Text_format_tag = ['b', 'i', 'u', 'small', 'sub', 'sup', ]
13
-    HTML_Text_semantic_tag = ['strong', 'em',
14
-                              'mark', 'cite', 'dfn',
15
-                              'del', 'ins', ]
16
-    HTML_Table_tag = ['table',
17
-                      'thead', 'tfoot', 'tbody',
18
-                      'tr', 'td', 'caption', ]
19
-
20
-    HTML_List_tag = ['ul', 'li', 'ol',  # simple list
21
-                     'dl', 'dt', 'dd', ]  # definition list
22
-
23
-    # Rules
24
-    Tag_whitelist = HTML_Heading_tag \
25
-                    + HTML_Text_parts_tag \
26
-                    + HTML_Text_format_tag \
27
-                    + HTML_Text_semantic_tag \
28
-                    + HTML_Table_tag \
29
-                    + HTML_List_tag
30
-
31
-    Tag_blacklist = ['script', 'style']
32
-
33
-    # TODO - G.M - 2017-12-01 - Think about removing class/id Blacklist
34
-    # These elements are no longer required.
35
-    Class_blacklist = []
36
-    Id_blacklist = []
37
-
38
-    Attrs_whitelist = ['href']
39
-
14
+    # whitelist : keep tag and content
15
+    Tag_whitelist = TAG_WHITELIST
16
+    Attrs_whitelist = ATTRS_WHITELIST
17
+    # blacklist : remove content
18
+    Tag_blacklist = TAG_BLACKLIST
19
+    Class_blacklist = CLASS_BLACKLIST
20
+    Id_blacklist = ID_BLACKLIST
40 21
 
41 22
 class HtmlSanitizer(object):
42 23
     """

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/attrs_whitelist.py View File

@@ -0,0 +1 @@
1
+ATTRS_WHITELIST = ['href']

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/class_blacklist.py View File

@@ -0,0 +1 @@
1
+CLASS_BLACKLIST =  []

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/id_blacklist.py View File

@@ -0,0 +1 @@
1
+ID_BLACKLIST = []

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/tag_blacklist.py View File

@@ -0,0 +1 @@
1
+TAG_BLACKLIST = ['script', 'style']

+ 16 - 0
tracim/tracim/lib/email_processing/sanitizer_config/tag_whitelist.py View File

@@ -0,0 +1,16 @@
1
+TAG_WHITELIST = [
2
+    'b', 'blockquote', 'br',
3
+    'caption', 'cite', 'code',
4
+    'dd', 'del', 'dfn', 'dl', 'dt',
5
+    'em',
6
+    'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr',
7
+    'i', 'img', 'ins',
8
+    'li',
9
+    'mark',
10
+    'ol',
11
+    'p', 'pre',
12
+    'q',
13
+    'samp', 'small', 'strong', 'sub', 'sup',
14
+    'table', 'tbody', 'td', 'tfoot', 'thead', 'tr',
15
+    'u', 'ul'
16
+]