Browse Source

better way to get mail body, prefer html over plaintext

Guénaël Muller 7 years ago
parent
commit
f638b74485
1 changed files with 34 additions and 11 deletions
  1. 34 11
      tracim/tracim/lib/email_fetcher.py

+ 34 - 11
tracim/tracim/lib/email_fetcher.py View File

@@ -53,21 +53,44 @@ def decode_mail(msg: Message)-> dict:
53 53
     except Exception:
54 54
         # FIXME - G.M - 2017-11-15 - handle exceptions correctly
55 55
         return {}
56
-    # FIXME - G.M - 2017-11-15 - get the best body candidate in MIME
57
-    # msg.get_body() look like the best way to get body but it's a py3.6 feature
58
-    for part in msg.walk():
59
-        # TODO - G.M - 2017-11-15 - Handle HTML mail body
60
-        # TODO - G.M - 2017-11-15 - Parse properly HTML (and text ?) body
61
-        if not part.get_content_type() == "text/plain":
62
-            continue
56
+
57
+    # TODO - G.M - 2017-11-15 - Parse properly HTML (and text ?) body
58
+    body = get_body_mime_part(msg)
59
+    if body:
60
+        charset = body.get_content_charset('iso-8859-1')
61
+        ctype = body.get_content_type()
62
+        if ctype == "text/plain":
63
+            mail_data['body'] = body.get_payload(decode=True).decode(charset)
64
+        elif ctype == "text/html":
65
+            mail_data['body'] = body.get_payload(decode=True).decode(charset)
63 66
         else:
64
-            # FIXME: check if decoding is working correctly
65
-            charset = part.get_content_charset('iso-8859-1')
66
-            mail_data['body'] = part.get_payload(decode=True).decode(charset)
67
-            break
67
+            pass
68
+    else:
69
+        pass
70
+
68 71
     return mail_data
69 72
 
70 73
 
74
+def get_body_mime_part(msg) -> Message:
75
+    # FIXME - G.M - 2017-11-16 - Use stdlib msg.get_body feature for py3.6+
76
+    # FIXME - G.M - 2017-11-16 - Check support for non-multipart mail
77
+    #assert msg.is_multipart()
78
+    part = None
79
+    # Check for html
80
+    for part in msg.walk():
81
+        ctype = part.get_content_type()
82
+        cdispo = str(part.get('Content-Disposition'))
83
+        if ctype == 'text/html' and 'attachment' not in cdispo:
84
+            return part
85
+    # checj fir plain text
86
+    for part in msg.walk():
87
+        ctype = part.get_content_type()
88
+        cdispo = str(part.get('Content-Disposition'))
89
+        if ctype == 'text/plain' and 'attachment' not in cdispo:
90
+            return part
91
+    return part
92
+
93
+
71 94
 def get_tracim_content_key(mail_data: dict) -> typing.Optional[str]:
72 95
 
73 96
     """ Link mail_data dict to tracim content