|
@@ -1,3 +1,4 @@
|
|
1
|
+import typing
|
1
|
2
|
from bs4 import BeautifulSoup, Tag
|
2
|
3
|
from tracim.lib.email_processing.sanitizer_config.attrs_whitelist import \
|
3
|
4
|
ATTRS_WHITELIST
|
|
@@ -31,7 +32,7 @@ class HtmlSanitizer(object):
|
31
|
32
|
"""
|
32
|
33
|
|
33
|
34
|
@classmethod
|
34
|
|
- def sanitize(cls, html_body: str) -> str:
|
|
35
|
+ def sanitize(cls, html_body: str) -> typing.Optional[str]:
|
35
|
36
|
soup = BeautifulSoup(html_body, 'html.parser')
|
36
|
37
|
for tag in soup.findAll():
|
37
|
38
|
if cls._tag_to_extract(tag):
|
|
@@ -43,7 +44,17 @@ class HtmlSanitizer(object):
|
43
|
44
|
del tag.attrs[attr]
|
44
|
45
|
else:
|
45
|
46
|
tag.unwrap()
|
46
|
|
- return str(soup)
|
|
47
|
+
|
|
48
|
+ if cls._is_content_empty(soup):
|
|
49
|
+ return None
|
|
50
|
+ else:
|
|
51
|
+ return str(soup)
|
|
52
|
+
|
|
53
|
+ @classmethod
|
|
54
|
+ def _is_content_empty(cls, soup):
|
|
55
|
+ img = soup.find('img')
|
|
56
|
+ txt = soup.get_text().replace('\n', '').strip()
|
|
57
|
+ return (not img and not txt)
|
47
|
58
|
|
48
|
59
|
@classmethod
|
49
|
60
|
def _tag_to_extract(cls, tag: Tag) -> bool:
|