|  | @@ -8,14 +8,13 @@ from tracim.lib.email_processing.checkers import HtmlMailQuoteChecker
 | 
	
		
			
			| 8 | 8 |  from tracim.lib.email_processing.checkers import HtmlMailSignatureChecker
 | 
	
		
			
			| 9 | 9 |  from tracim.lib.email_processing.models import BodyMailPartType
 | 
	
		
			
			| 10 | 10 |  from tracim.lib.email_processing.models import BodyMailPart
 | 
	
		
			
			| 11 |  | -from tracim.lib.email_processing.models import BodyMailParts
 | 
	
		
			
			|  | 11 | +from tracim.lib.email_processing.models import HtmlBodyMailParts
 | 
	
		
			
			| 12 | 12 |  
 | 
	
		
			
			| 13 | 13 |  class PreSanitizeConfig(object):
 | 
	
		
			
			| 14 | 14 |      """
 | 
	
		
			
			| 15 |  | -    To avoid problems, html need to be a bit during parsing to distinct
 | 
	
		
			
			|  | 15 | +    To avoid problems, html need to be sanitize a bit during parsing to distinct
 | 
	
		
			
			| 16 | 16 |      Main,Quote and Signature elements
 | 
	
		
			
			| 17 | 17 |      """
 | 
	
		
			
			| 18 |  | -    Ignored_tags = ['br', 'hr', 'script', 'style']
 | 
	
		
			
			| 19 | 18 |      meta_tag = ['body', 'div']
 | 
	
		
			
			| 20 | 19 |  
 | 
	
		
			
			| 21 | 20 |  
 | 
	
	
		
			
			|  | @@ -32,11 +31,11 @@ class ParsedHTMLMail(object):
 | 
	
		
			
			| 32 | 31 |      def __str__(self):
 | 
	
		
			
			| 33 | 32 |          return str(self._parse_mail())
 | 
	
		
			
			| 34 | 33 |  
 | 
	
		
			
			| 35 |  | -    def get_elements(self) -> BodyMailParts:
 | 
	
		
			
			|  | 34 | +    def get_elements(self) -> HtmlBodyMailParts:
 | 
	
		
			
			| 36 | 35 |          tree = self._get_proper_main_body_tree()
 | 
	
		
			
			| 37 | 36 |          return self._distinct_elements(tree)
 | 
	
		
			
			| 38 | 37 |  
 | 
	
		
			
			| 39 |  | -    def _parse_mail(self) -> BodyMailParts:
 | 
	
		
			
			|  | 38 | +    def _parse_mail(self) -> HtmlBodyMailParts:
 | 
	
		
			
			| 40 | 39 |          elements = self.get_elements()
 | 
	
		
			
			| 41 | 40 |          elements = self._process_elements(elements)
 | 
	
		
			
			| 42 | 41 |          return elements
 | 
	
	
		
			
			|  | @@ -69,26 +68,16 @@ class ParsedHTMLMail(object):
 | 
	
		
			
			| 69 | 68 |          return tree
 | 
	
		
			
			| 70 | 69 |  
 | 
	
		
			
			| 71 | 70 |      @classmethod
 | 
	
		
			
			| 72 |  | -    def _distinct_elements(cls, tree: BeautifulSoup) -> BodyMailParts:
 | 
	
		
			
			| 73 |  | -        parts = BodyMailParts()
 | 
	
		
			
			|  | 71 | +    def _distinct_elements(cls, tree: BeautifulSoup) -> HtmlBodyMailParts:
 | 
	
		
			
			|  | 72 | +        parts = HtmlBodyMailParts()
 | 
	
		
			
			| 74 | 73 |          for elem in list(tree):
 | 
	
		
			
			| 75 | 74 |              part_txt = str(elem)
 | 
	
		
			
			| 76 | 75 |              part_type = BodyMailPartType.Main
 | 
	
		
			
			| 77 |  | -            # sanitize NavigableString
 | 
	
		
			
			| 78 |  | -            if isinstance(elem, NavigableString):
 | 
	
		
			
			| 79 |  | -                part_txt = part_txt.replace('\n', '').strip()
 | 
	
		
			
			| 80 | 76 |  
 | 
	
		
			
			| 81 | 77 |              if HtmlMailQuoteChecker.is_quote(elem):
 | 
	
		
			
			| 82 | 78 |                  part_type = BodyMailPartType.Quote
 | 
	
		
			
			| 83 | 79 |              elif HtmlMailSignatureChecker.is_signature(elem):
 | 
	
		
			
			| 84 | 80 |                  part_type = BodyMailPartType.Signature
 | 
	
		
			
			| 85 |  | -            else:
 | 
	
		
			
			| 86 |  | -                # INFO - G.M -2017-11-28 - ignore unwanted parts
 | 
	
		
			
			| 87 |  | -                if not part_txt:
 | 
	
		
			
			| 88 |  | -                    continue
 | 
	
		
			
			| 89 |  | -                if isinstance(elem, Tag) \
 | 
	
		
			
			| 90 |  | -                        and elem.name.lower() in PreSanitizeConfig.Ignored_tags:
 | 
	
		
			
			| 91 |  | -                    continue
 | 
	
		
			
			| 92 | 81 |  
 | 
	
		
			
			| 93 | 82 |              part = BodyMailPart(part_txt, part_type)
 | 
	
		
			
			| 94 | 83 |              parts.append(part)
 | 
	
	
		
			
			|  | @@ -99,7 +88,7 @@ class ParsedHTMLMail(object):
 | 
	
		
			
			| 99 | 88 |          return parts
 | 
	
		
			
			| 100 | 89 |  
 | 
	
		
			
			| 101 | 90 |      @classmethod
 | 
	
		
			
			| 102 |  | -    def _process_elements(cls, elements: BodyMailParts) -> BodyMailParts:
 | 
	
		
			
			|  | 91 | +    def _process_elements(cls, elements: HtmlBodyMailParts) -> HtmlBodyMailParts:
 | 
	
		
			
			| 103 | 92 |          if len(elements) >= 2:
 | 
	
		
			
			| 104 | 93 |              # Case 1 and 2, only one main and one quote
 | 
	
		
			
			| 105 | 94 |              if elements.get_nb_part_type('main') == 1 and \
 | 
	
	
		
			
			|  | @@ -119,19 +108,19 @@ class ParsedHTMLMail(object):
 | 
	
		
			
			| 119 | 108 |          return elements
 | 
	
		
			
			| 120 | 109 |  
 | 
	
		
			
			| 121 | 110 |      @classmethod
 | 
	
		
			
			| 122 |  | -    def _process_quote_first_case(cls, elements: BodyMailParts) -> None:
 | 
	
		
			
			|  | 111 | +    def _process_quote_first_case(cls, elements: HtmlBodyMailParts) -> None:
 | 
	
		
			
			| 123 | 112 |          elements.drop_part_type(BodyMailPartType.Signature)
 | 
	
		
			
			| 124 | 113 |  
 | 
	
		
			
			| 125 | 114 |      @classmethod
 | 
	
		
			
			| 126 |  | -    def _process_main_first_case(cls, elements: BodyMailParts) -> None:
 | 
	
		
			
			|  | 115 | +    def _process_main_first_case(cls, elements: HtmlBodyMailParts) -> None:
 | 
	
		
			
			| 127 | 116 |          elements.drop_part_type(BodyMailPartType.Quote)
 | 
	
		
			
			| 128 | 117 |          elements.drop_part_type(BodyMailPartType.Signature)
 | 
	
		
			
			| 129 | 118 |  
 | 
	
		
			
			| 130 | 119 |      @classmethod
 | 
	
		
			
			| 131 |  | -    def _process_multiples_elems_case(cls, elements: BodyMailParts) -> None:
 | 
	
		
			
			|  | 120 | +    def _process_multiples_elems_case(cls, elements: HtmlBodyMailParts) -> None:
 | 
	
		
			
			| 132 | 121 |          elements.drop_part_type(BodyMailPartType.Signature)
 | 
	
		
			
			| 133 | 122 |  
 | 
	
		
			
			| 134 | 123 |      @classmethod
 | 
	
		
			
			| 135 |  | -    def _process_default_case(cls, elements: BodyMailParts) -> None:
 | 
	
		
			
			|  | 124 | +    def _process_default_case(cls, elements: HtmlBodyMailParts) -> None:
 | 
	
		
			
			| 136 | 125 |          elements.drop_part_type(BodyMailPartType.Quote)
 | 
	
		
			
			| 137 | 126 |          elements.drop_part_type(BodyMailPartType.Signature)
 |