Browse Source

Email fetcher for new mail

Guénaël Muller 7 years ago
parent
commit
24cb433328
2 changed files with 264 additions and 0 deletions
  1. 170 0
      tracim/tracim/lib/email_fetcher.py
  2. 94 0
      tracim/tracim/lib/test_email_fetcher.py

+ 170 - 0
tracim/tracim/lib/email_fetcher.py View File

@@ -0,0 +1,170 @@
1
+from email.message import Message
2
+from typing import Union
3
+import sys
4
+import time
5
+import imaplib
6
+import email
7
+import email.header
8
+from email.header import Header, decode_header, make_header
9
+import datetime
10
+
11
+TRACIM_SPECIAL_KEY_HEADER="X-Tracim-Key"
12
+
13
+def str_header(header:Header):
14
+    return str(make_header(decode_header(header)))
15
+
16
+def decode_mail(msg:Message)-> dict:
17
+    """
18
+    Get useful header and body content and decode from Message
19
+    :param msg:
20
+    :return:
21
+    """
22
+    mailData = {}
23
+
24
+    try:
25
+        mailData['subject'] = str_header(msg['subject'])
26
+        mailData['msg_id'] = str_header(msg['Message-ID'])
27
+        mailData['from'] = str_header(msg['From'])
28
+        # Reply key
29
+        mailData['to'] = str_header(msg['To'])
30
+        mailData['references'] = str_header(msg['References'])
31
+        if TRACIM_SPECIAL_KEY_HEADER in msg:
32
+            mailData[TRACIM_SPECIAL_KEY_HEADER] = str_header(msg[TRACIM_SPECIAL_KEY_HEADER])
33
+        # date
34
+        date_h = str_header(msg['Date'])
35
+        date_tuple = email.utils.parsedate_tz(date_h)
36
+
37
+        mailData['date'] = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
38
+
39
+    except Exception as e:
40
+        # TODO: exception -> mail not correctly formatted
41
+        return None
42
+    #email.utils.mktime_tz(date_tuple))
43
+    #print( "Local Date:", local_date.strftime("%a, %d %b %Y %H:%M:%S"))
44
+    ## TODO : msg.get_body look like the best way to get body but it's a new feature now (08112017).
45
+    for part in msg.walk():
46
+        if not part.get_content_type() == "text/plain":
47
+            continue
48
+        else:
49
+            # TODO: check if decoding is working correctly
50
+            charset = part.get_content_charset('iso-8859-1')
51
+            mailData['body']= part.get_payload(decode=True).decode(charset)
52
+            break
53
+    return mailData
54
+
55
+def get_tracim_content_key(mailData:dict) -> Union[str,None]:
56
+
57
+    """ Link mailData dict to tracim content
58
+    First try checking special header, them check 'to' header
59
+    and finally check first(oldest) mail-id of 'references' header
60
+    """
61
+    key = None
62
+    if TRACIM_SPECIAL_KEY_HEADER in mailData:
63
+        key = mailData[TRACIM_SPECIAL_KEY_HEADER]
64
+    if key is None and 'to' in mailData:
65
+        key = find_key_from_mail_adress(mailData['to'])
66
+    if key is None and 'references' in mailData:
67
+        mail_adress = mailData['references'].split('>')[0].replace('<','')
68
+        key = find_key_from_mail_adress(mail_adress)
69
+    return key
70
+
71
+
72
+def find_key_from_mail_adress(mail_address:str) -> Union[str,None]:
73
+    """ Parse mail_adress-like string
74
+    to retrieve key.
75
+
76
+    :param mail_address: user+key@something like string
77
+    :return: key
78
+    """
79
+    username= mail_address.split('@')[0]
80
+    username_data = username.split('+')
81
+    if len(username_data) == 2:
82
+        key = username_data[1]
83
+    else:
84
+        key = None
85
+    return key
86
+
87
+
88
+class MailFetcher(object):
89
+
90
+    def __init__(self,host,port,user,password,folder,delay):
91
+        self._connection = None
92
+        self._mails = []
93
+
94
+        self.host = host
95
+        self.port = port
96
+        self.user = user
97
+        self.password = password
98
+        self.folder = folder
99
+        self.delay = delay
100
+
101
+        self._is_active = True
102
+
103
+
104
+    def run(self):
105
+        while self._is_active:
106
+            time.sleep(self.delay)
107
+            if self._connect():
108
+                self._fetch()
109
+                self._notify_tracim()
110
+                self._disconnect()
111
+
112
+    def stop(self):
113
+        self._is_active = False
114
+
115
+    def _connect(self):
116
+        ## verify if connected ?
117
+        if not self._connection:
118
+            # TODO: Support unencrypted connection ?
119
+            # TODO: Support keyfile,certfile ?
120
+            self._connection = imaplib.IMAP4_SSL(self.host,self.port)
121
+            try:
122
+                rv, data = self._connection.login(self.user,self.password)
123
+                return True
124
+            except Exception as e:
125
+                log = 'IMAP login error: {}'
126
+                logger.debug(self, log.format(e.__str__()))
127
+                self._connection = None
128
+                return False
129
+        return False
130
+    def _disconnect(self):
131
+        if self._connection:
132
+            self._connection.logout()
133
+
134
+    def _fetch(self):
135
+        """
136
+        Get news message from mailbox
137
+        """
138
+
139
+        # select mailbox
140
+        rv, data = self._connection.select(self.folder)
141
+        if rv == 'OK':
142
+            # get mails
143
+            # TODO: search only new mail or drop/moved the added one ?
144
+            rv, data = self._connection.search(None, "(UNSEEN)")
145
+            if rv == 'OK':
146
+                # get mail content
147
+                for num in data[0].split():
148
+                    rv, data = self._connection.fetch(num, '(RFC822)')
149
+                    if rv == 'OK':
150
+                        msg = email.message_from_bytes(data[0][1])
151
+                        self._mails.append(msg)
152
+                        ret = True
153
+                    else:
154
+                        # TODO : Check best debug value
155
+                        log = 'IMAP : Unable to get mail : {}'
156
+                        logger.debug(self,log.format(str(rv)))
157
+            else:
158
+                #TODO : Distinct error from empty mailbox ?
159
+                pass
160
+        else :
161
+            # TODO : Check best debug value
162
+            log = 'IMAP : Unable to open mailbox : {}'
163
+            logger.debug(self,log.format(str(rv)))
164
+
165
+    def _notify_tracim(self):
166
+        while self._mails:
167
+            mail = self._mails.pop()
168
+            decoded_mail = decode_mail(mail)
169
+            key = get_tracim_content_key(decoded_mail)
170
+            pass

+ 94 - 0
tracim/tracim/lib/test_email_fetcher.py View File

@@ -0,0 +1,94 @@
1
+from email_fetcher import decode_mail, get_tracim_content_key,TRACIM_SPECIAL_KEY_HEADER,find_key_from_mail_adress
2
+from email.mime.multipart import MIMEMultipart
3
+from email.utils import parsedate_tz,mktime_tz
4
+import datetime
5
+
6
+# decode_mail
7
+
8
+def test_decode_mail_ok():
9
+    msg = MIMEMultipart()
10
+    msg['From'] = 'a@home'
11
+    msg['To'] = 'b@home'
12
+    msg['Subject'] = "test"
13
+    #msg.add_header('Reply-To', '<a+key@home>')
14
+    msg.add_header('References', '<reply+key@home>')
15
+    msg.add_header('Message-ID', '<uniquevalue@home>')
16
+    msg.add_header('Date', 'Wed, 8 Nov 2017 15:21:10 +0100')
17
+    msg.add_header(TRACIM_SPECIAL_KEY_HEADER, 'key')
18
+    maildata=decode_mail(msg)
19
+    # same format for date
20
+    date_tuple = parsedate_tz('Wed, 8 Nov 2017 15:21:10 +0100')
21
+    date = datetime.datetime.fromtimestamp(mktime_tz(date_tuple))
22
+
23
+    assert maildata == {
24
+        TRACIM_SPECIAL_KEY_HEADER: 'key',
25
+        'from': 'a@home',
26
+        'to': 'b@home',
27
+        'subject':'test',
28
+        'references':'<reply+key@home>',
29
+        'msg_id': '<uniquevalue@home>',
30
+        'date': date
31
+    }
32
+# get_tracim_content_key
33
+
34
+def test_get_tracim_content_key_empty():
35
+    mail_data={}
36
+    assert get_tracim_content_key(mail_data) == None
37
+
38
+def test_get_tracim_content_key_no_key():
39
+    mail_data={
40
+        'to':'a@b',
41
+        'references':'<a@b> <b@c>'
42
+    }
43
+    assert get_tracim_content_key(mail_data) == None
44
+
45
+def test_get_tracim_content_key_special_key():
46
+    mail_data={
47
+        'to':'a@b',
48
+        'references':'<a@b> <b@c>',
49
+        TRACIM_SPECIAL_KEY_HEADER : 'key'
50
+    }
51
+    assert get_tracim_content_key(mail_data) == 'key'
52
+
53
+
54
+def test_get_tracim_content_key_to_key():
55
+    mail_data={
56
+        'to':'a+key@b',
57
+        'references':'<a@b> <b@c>',
58
+    }
59
+    assert get_tracim_content_key(mail_data) == 'key'
60
+
61
+def test_get_tracim_content_key_references_key():
62
+    mail_data={
63
+        'to':'a@b',
64
+        'references':'<a+key@b> <b@c>',
65
+    }
66
+    assert get_tracim_content_key(mail_data) == 'key'
67
+
68
+def test_get_tracim_content_key_order():
69
+    mail_data={
70
+        'to':'a+2@b',
71
+        'references':'<a+3@b> <b@c>',
72
+        TRACIM_SPECIAL_KEY_HEADER: '1'
73
+    }
74
+    assert get_tracim_content_key(mail_data) == '1'
75
+    mail_data={
76
+        'to':'a+2@b',
77
+        'references':'<a+3@b> <b@c>',
78
+    }
79
+    assert get_tracim_content_key(mail_data) == '2'
80
+
81
+    mail_data={
82
+        'references':'<a+3@b> <b@c>',
83
+    }
84
+    assert get_tracim_content_key(mail_data) == '3'
85
+
86
+# find_key_from_mail_address
87
+
88
+def test_find_key_from_mail_address_no_key():
89
+    mail_adress="a@b"
90
+    assert find_key_from_mail_adress(mail_adress) == None
91
+
92
+def test_find_key_from_mail_adress_key():
93
+    mail_address="a+key@b"
94
+    assert find_key_from_mail_adress(mail_address) == 'key'