Bladeren bron

Merge branch 'master' of https://github.com/tracim/tracim into fix/532/webdav_apache_documentation

philippe 6 jaren geleden
bovenliggende
commit
33c4ac4962

+ 31 - 0
doc/devtools.md Bestand weergeven

@@ -0,0 +1,31 @@
1
+# Devtools
2
+
3
+# Check third party licences
4
+
5
+Install `yolk3k` pip package:
6
+
7
+    pip install yolk3k
8
+
9
+Then execute command:
10
+
11
+    yolk -l -f license
12
+
13
+Output will look like:
14
+
15
+```
16
+Babel (2.2.0)
17
+    License: BSD
18
+
19
+Beaker (1.6.4)
20
+    License: BSD
21
+
22
+CherryPy (3.6.0)
23
+    License: BSD
24
+
25
+FormEncode (1.3.0a1)
26
+    License: PSF
27
+
28
+Genshi (0.7)
29
+    License: BSD
30
+...
31
+```

+ 2 - 2
gulpfile.js Bestand weergeven

@@ -21,8 +21,8 @@ const _srcdir = 'tracim/tracim/public/assets/'
21 21
 const _tpldir = 'tracim/tracim/templates/'
22 22
 
23 23
 const listCssFiles = [
24
-  _srcdir + 'css/default_skin.css',
25
-  _srcdir + 'css/bootstrap.css',
24
+  // _srcdir + 'css/default_skin.css',
25
+  // _srcdir + 'css/bootstrap.css', // Côme - 2018/01/11 - removed to allow easier configuration of theme builder
26 26
   _srcdir + 'font-awesome-4.2.0/css/font-awesome.css',
27 27
   _srcdir + 'select2-4.0.3/css/select2.min.css',
28 28
   _srcdir + 'css/dashboard.css'

+ 1 - 0
install/requirements.txt Bestand weergeven

@@ -68,3 +68,4 @@ click==6.7
68 68
 markdown==2.6.9
69 69
 email_reply_parser==0.5.9
70 70
 filelock==2.0.13
71
+imapclient==1.1.0

+ 3 - 0
tracim/development.ini.base Bestand weergeven

@@ -222,6 +222,9 @@ email.reply.imap.user = your_imap_user
222 222
 email.reply.imap.password = your_imap_password
223 223
 email.reply.imap.folder = INBOX
224 224
 email.reply.imap.use_ssl = true
225
+email.reply.imap.use_idle = true
226
+# Re-new connection each 10 minutes
227
+email.reply.connection.max_lifetime = 600
225 228
 # Token for communication between mail fetcher and tracim controller
226 229
 email.reply.token = mysecuretoken
227 230
 # Delay in seconds between each check

+ 8 - 0
tracim/tracim/config/app_cfg.py Bestand weergeven

@@ -384,6 +384,14 @@ class CFG(object):
384 384
         self.EMAIL_REPLY_IMAP_USE_SSL = asbool(tg.config.get(
385 385
             'email.reply.imap.use_ssl',
386 386
         ))
387
+        self.EMAIL_REPLY_IMAP_USE_IDLE = asbool(tg.config.get(
388
+            'email.reply.imap.use_idle',
389
+            True,
390
+        ))
391
+        self.EMAIL_REPLY_CONNECTION_MAX_LIFETIME = int(tg.config.get(
392
+            'email.reply.connection.max_lifetime',
393
+            600, # 10 minutes
394
+        ))
387 395
         self.EMAIL_REPLY_USE_HTML_PARSING = asbool(tg.config.get(
388 396
             'email.reply.use_html_parsing',
389 397
             True,

+ 3 - 1
tracim/tracim/lib/daemons.py Bestand weergeven

@@ -173,7 +173,9 @@ class MailFetcherDaemon(Daemon):
173 173
             password=cfg.EMAIL_REPLY_IMAP_PASSWORD,
174 174
             use_ssl=cfg.EMAIL_REPLY_IMAP_USE_SSL,
175 175
             folder=cfg.EMAIL_REPLY_IMAP_FOLDER,
176
-            delay=cfg.EMAIL_REPLY_CHECK_HEARTBEAT,
176
+            heartbeat=cfg.EMAIL_REPLY_CHECK_HEARTBEAT,
177
+            use_idle=cfg.EMAIL_REPLY_IMAP_USE_IDLE,
178
+            connection_max_lifetime=cfg.EMAIL_REPLY_CONNECTION_MAX_LIFETIME,
177 179
             # FIXME - G.M - 2017-11-15 - proper tracim url formatting
178 180
             endpoint=cfg.WEBSITE_BASE_URL + "/events",
179 181
             token=cfg.EMAIL_REPLY_TOKEN,

+ 200 - 168
tracim/tracim/lib/email_fetcher.py Bestand weergeven

@@ -1,9 +1,11 @@
1 1
 # -*- coding: utf-8 -*-
2 2
 
3 3
 import time
4
-import imaplib
5 4
 import json
6 5
 import typing
6
+import socket
7
+import ssl
8
+
7 9
 from email import message_from_bytes
8 10
 from email.header import decode_header
9 11
 from email.header import make_header
@@ -13,6 +15,8 @@ from email.utils import parseaddr
13 15
 import filelock
14 16
 import markdown
15 17
 import requests
18
+import imapclient
19
+
16 20
 from email_reply_parser import EmailReplyParser
17 21
 from tracim.lib.base import logger
18 22
 from tracim.lib.email_processing.parser import ParsedHTMLMail
@@ -22,9 +26,13 @@ TRACIM_SPECIAL_KEY_HEADER = 'X-Tracim-Key'
22 26
 CONTENT_TYPE_TEXT_PLAIN = 'text/plain'
23 27
 CONTENT_TYPE_TEXT_HTML = 'text/html'
24 28
 
25
-IMAP_SEEN_FLAG = '\\Seen'
26
-IMAP_CHECKED_FLAG = '\\Flagged'
29
+IMAP_CHECKED_FLAG = imapclient.FLAGGED
30
+IMAP_SEEN_FLAG = imapclient.SEEN
31
+
27 32
 MAIL_FETCHER_FILELOCK_TIMEOUT = 10
33
+MAIL_FETCHER_CONNECTION_TIMEOUT = 60*3
34
+MAIL_FETCHER_IDLE_RESPONSE_TIMEOUT = 60*9   # this should be not more
35
+# that 29 minutes according to rfc2177.(server wait 30min by default)
28 36
 
29 37
 
30 38
 class MessageContainer(object):
@@ -144,6 +152,10 @@ class DecodedMail(object):
144 152
         return None
145 153
 
146 154
 
155
+class BadIMAPFetchResponse(Exception):
156
+    pass
157
+
158
+
147 159
 class MailFetcher(object):
148 160
     def __init__(
149 161
         self,
@@ -153,7 +165,9 @@ class MailFetcher(object):
153 165
         password: str,
154 166
         use_ssl: bool,
155 167
         folder: str,
156
-        delay: int,
168
+        use_idle: bool,
169
+        connection_max_lifetime: int,
170
+        heartbeat: int,
157 171
         endpoint: str,
158 172
         token: str,
159 173
         use_html_parsing: bool,
@@ -170,20 +184,25 @@ class MailFetcher(object):
170 184
         :param password: user password of mailbox
171 185
         :param use_ssl: use imap over ssl connection
172 186
         :param folder: mail folder where new mail are fetched
173
-        :param delay: seconds to wait before fetching new mail again
187
+        :param use_idle: use IMAP IDLE(server notification) when available
188
+        :param heartbeat: seconds to wait before fetching new mail again
189
+        :param connection_max_lifetime: maximum duration allowed for a
190
+             connection . connection are automatically renew when their
191
+             lifetime excess this duration.
174 192
         :param endpoint: tracim http endpoint where decoded mail are send.
175 193
         :param token: token to authenticate http connexion
176 194
         :param use_html_parsing: parse html mail
177 195
         :param use_txt_parsing: parse txt mail
178 196
         """
179
-        self._connection = None
180 197
         self.host = host
181 198
         self.port = port
182 199
         self.user = user
183 200
         self.password = password
184 201
         self.use_ssl = use_ssl
185 202
         self.folder = folder
186
-        self.delay = delay
203
+        self.heartbeat = heartbeat
204
+        self.use_idle = use_idle
205
+        self.connection_max_lifetime = connection_max_lifetime
187 206
         self.endpoint = endpoint
188 207
         self.token = token
189 208
         self.use_html_parsing = use_html_parsing
@@ -194,150 +213,215 @@ class MailFetcher(object):
194 213
     def run(self) -> None:
195 214
         logger.info(self, 'Starting MailFetcher')
196 215
         while self._is_active:
197
-            logger.debug(self, 'sleep for {}'.format(self.delay))
198
-            time.sleep(self.delay)
216
+            imapc = None
217
+            sleep_after_connection = True
199 218
             try:
200
-                self._connect()
201
-                with self.lock.acquire(
202
-                        timeout=MAIL_FETCHER_FILELOCK_TIMEOUT
203
-                ):
204
-                    messages = self._fetch()
205
-                cleaned_mails = [DecodedMail(m.message, m.uid)
206
-                                 for m in messages]
207
-                self._notify_tracim(cleaned_mails)
208
-                self._disconnect()
219
+                imapc = imapclient.IMAPClient(
220
+                    self.host,
221
+                    self.port,
222
+                    ssl=self.use_ssl,
223
+                    timeout=MAIL_FETCHER_CONNECTION_TIMEOUT
224
+                )
225
+                imapc.login(self.user, self.password)
226
+
227
+                logger.debug(self, 'Select folder {}'.format(
228
+                    self.folder,
229
+                ))
230
+                imapc.select_folder(self.folder)
231
+
232
+                # force renew connection when deadline is reached
233
+                deadline = time.time() + self.connection_max_lifetime
234
+                while True:
235
+                    if not self._is_active:
236
+                        logger.warning(self, 'Mail Fetcher process aborted')
237
+                        sleep_after_connection = False
238
+                        break
239
+
240
+                    if time.time() > deadline:
241
+                        logger.debug(
242
+                            self,
243
+                            "MailFetcher Connection Lifetime limit excess"
244
+                            ", Try Re-new connection")
245
+                        sleep_after_connection = False
246
+                        break
247
+
248
+                    # check for new mails
249
+                    self._check_mail(imapc)
250
+
251
+                    if self.use_idle and imapc.has_capability('IDLE'):
252
+                        # IDLE_mode wait until event from server
253
+                        logger.debug(self, 'wail for event(IDLE)')
254
+                        imapc.idle()
255
+                        imapc.idle_check(
256
+                            timeout=MAIL_FETCHER_IDLE_RESPONSE_TIMEOUT
257
+                        )
258
+                        imapc.idle_done()
259
+                    else:
260
+                        if self.use_idle and not imapc.has_capability('IDLE'):
261
+                            log = 'IDLE mode activated but server do not' \
262
+                                  'support it, use polling instead.'
263
+                            logger.warning(self, log)
264
+                        # normal polling mode : sleep a define duration
265
+                        logger.debug(self,
266
+                                     'sleep for {}'.format(self.heartbeat))
267
+                        time.sleep(self.heartbeat)
268
+
269
+            # Socket
270
+            except (socket.error,
271
+                    socket.gaierror,
272
+                    socket.herror) as e:
273
+                log = 'Socket fail with IMAP connection {}'
274
+                logger.error(self, log.format(e.__str__()))
275
+
276
+            except socket.timeout as e:
277
+                log = 'Socket timeout on IMAP connection {}'
278
+                logger.error(self, log.format(e.__str__()))
279
+
280
+            # SSL
281
+            except ssl.SSLError as e:
282
+                log = 'SSL error on IMAP connection'
283
+                logger.error(self, log.format(e.__str__()))
284
+
285
+            except ssl.CertificateError as e:
286
+                log = 'SSL Certificate verification failed on IMAP connection'
287
+                logger.error(self, log.format(e.__str__()))
288
+
289
+            # Filelock
209 290
             except filelock.Timeout as e:
210 291
                 log = 'Mail Fetcher Lock Timeout {}'
211 292
                 logger.warning(self, log.format(e.__str__()))
293
+
294
+            # IMAP
295
+            # TODO - G.M - 10-01-2017 - Support imapclient exceptions
296
+            # when Imapclient stable will be 2.0+
297
+
298
+            except BadIMAPFetchResponse as e:
299
+                log = 'Imap Fetch command return bad response.' \
300
+                      'Is someone else connected to the mailbox ?: ' \
301
+                      '{}'
302
+                logger.error(self, log.format(e.__str__()))
303
+            # Others
212 304
             except Exception as e:
213
-                # TODO - G.M - 2017-11-23 - Identify possible exceptions
214
-                log = 'IMAP error: {}'
215
-                logger.warning(self, log.format(e.__str__()))
305
+                log = 'Mail Fetcher error {}'
306
+                logger.error(self, log.format(e.__str__()))
307
+
308
+            finally:
309
+                # INFO - G.M - 2018-01-09 - Connection closing
310
+                # Properly close connection according to
311
+                # https://github.com/mjs/imapclient/pull/279/commits/043e4bd0c5c775c5a08cb5f1baa93876a46732ee
312
+                # TODO : Use __exit__ method instead when imapclient stable will
313
+                # be 2.0+ .
314
+                if imapc:
315
+                    logger.debug(self, 'Try logout')
316
+                    try:
317
+                        imapc.logout()
318
+                    except Exception:
319
+                        try:
320
+                            imapc.shutdown()
321
+                        except Exception as e:
322
+                            log = "Can't logout, connection broken ? {}"
323
+                            logger.error(self, log.format(e.__str__()))
324
+
325
+            if sleep_after_connection:
326
+                logger.debug(self, 'sleep for {}'.format(self.heartbeat))
327
+                time.sleep(self.heartbeat)
328
+
329
+        log = 'Mail Fetcher stopped'
330
+        logger.debug(self, log)
331
+
332
+    def _check_mail(self, imapc: imapclient.IMAPClient) -> None:
333
+        with self.lock.acquire(
334
+                timeout=MAIL_FETCHER_FILELOCK_TIMEOUT
335
+        ):
336
+            messages = self._fetch(imapc)
337
+            cleaned_mails = [DecodedMail(m.message, m.uid)
338
+                             for m in messages]
339
+            self._notify_tracim(cleaned_mails, imapc)
216 340
 
217 341
     def stop(self) -> None:
218 342
         self._is_active = False
219 343
 
220
-    def _connect(self) -> None:
221
-        # TODO - G.M - 2017-11-15 Verify connection/disconnection
222
-        # Are old connexion properly close this way ?
223
-        if self._connection:
224
-            logger.debug(self, 'Disconnect from IMAP')
225
-            self._disconnect()
226
-        # TODO - G.M - 2017-11-23 Support for predefined SSLContext ?
227
-        # without ssl_context param, tracim use default security configuration
228
-        # which is great in most case.
229
-        if self.use_ssl:
230
-            logger.debug(self, 'Connect IMAP {}:{} using SSL'.format(
231
-                self.host,
232
-                self.port,
233
-            ))
234
-            self._connection = imaplib.IMAP4_SSL(self.host, self.port)
235
-        else:
236
-            logger.debug(self, 'Connect IMAP {}:{}'.format(
237
-                self.host,
238
-                self.port,
239
-            ))
240
-            self._connection = imaplib.IMAP4(self.host, self.port)
241
-
242
-        try:
243
-            logger.debug(self, 'Login IMAP with login {}'.format(
244
-                self.user,
245
-            ))
246
-            self._connection.login(self.user, self.password)
247
-        except Exception as e:
248
-            log = 'Error during execution: {}'
249
-            logger.error(self, log.format(e.__str__()), exc_info=1)
250
-
251
-    def _disconnect(self) -> None:
252
-        if self._connection:
253
-            self._connection.close()
254
-            self._connection.logout()
255
-            self._connection = None
256
-
257
-    def _fetch(self) -> typing.List[MessageContainer]:
344
+    def _fetch(
345
+        self, 
346
+        imapc: imapclient.IMAPClient,
347
+    ) -> typing.List[MessageContainer]:
258 348
         """
259 349
         Get news message from mailbox
260 350
         :return: list of new mails
261 351
         """
262 352
         messages = []
263
-        # select mailbox
264
-        logger.debug(self, 'Fetch messages from folder {}'.format(
265
-            self.folder,
266
-        ))
267
-        rv, data = self._connection.select(self.folder)
268
-        logger.debug(self, 'Response status {}'.format(
269
-            rv,
353
+
354
+        logger.debug(self, 'Fetch unflagged messages')
355
+        uids = imapc.search(['UNFLAGGED'])
356
+        logger.debug(self, 'Found {} unflagged mails'.format(
357
+            len(uids),
270 358
         ))
271
-        if rv == 'OK':
272
-            # get mails
273
-            # TODO - G.M -  2017-11-15 Which files to select as new file ?
274
-            # Unseen file or All file from a directory (old one should be
275
-            #  moved/ deleted from mailbox during this process) ?
276
-            logger.debug(self, 'Fetch unseen messages')
277
-
278
-            rv, data = self._connection.search(None, "(UNSEEN)")
279
-            logger.debug(self, 'Response status {}'.format(
280
-                rv,
359
+        for msgid, data in imapc.fetch(uids, ['BODY.PEEK[]']).items():
360
+            # INFO - G.M - 2017-12-08 - Fetch BODY.PEEK[]
361
+            # Retrieve all mail(body and header) but don't set mail
362
+            # as seen because of PEEK
363
+            # see rfc3501
364
+            logger.debug(self, 'Fetch mail "{}"'.format(
365
+                msgid,
281 366
             ))
282
-            if rv == 'OK':
283
-                # get mail content
284
-                logger.debug(self, 'Found {} unseen mails'.format(
285
-                    len(data[0].split()),
286
-                ))
287
-                for uid in data[0].split():
288
-                    # INFO - G.M - 2017-12-08 - Fetch BODY.PEEK[]
289
-                    # Retrieve all mail(body and header) but don't set mail
290
-                    # as seen because of PEEK
291
-                    # see rfc3501
292
-                    logger.debug(self, 'Fetch mail "{}"'.format(
293
-                        uid,
294
-                    ))
295
-                    rv, data = self._connection.fetch(uid, 'BODY.PEEK[]')
296
-                    logger.debug(self, 'Response status {}'.format(
297
-                        rv,
298
-                    ))
299
-                    if rv == 'OK':
300
-                        msg = message_from_bytes(data[0][1])
301
-                        msg_container = MessageContainer(msg, uid)
302
-                        messages.append(msg_container)
303
-                        self._set_flag(uid, IMAP_SEEN_FLAG)
304
-                    else:
305
-                        log = 'IMAP : Unable to get mail : {}'
306
-                        logger.error(self, log.format(str(rv)))
307
-            else:
308
-                log = 'IMAP : Unable to get unseen mail : {}'
309
-                logger.error(self, log.format(str(rv)))
310
-        else:
311
-            log = 'IMAP : Unable to open mailbox : {}'
312
-            logger.error(self, log.format(str(rv)))
367
+
368
+            try:
369
+                msg = message_from_bytes(data[b'BODY[]'])
370
+            except KeyError as e:
371
+                # INFO - G.M - 12-01-2018 - Fetch may return events response
372
+                # In some specific case, fetch command may return events
373
+                # response unrelated to fetch request.
374
+                # This should happen only when someone-else use the mailbox
375
+                # at the same time of the fetcher.
376
+                # see https://github.com/mjs/imapclient/issues/334
377
+                except_msg = 'fetch response : {}'.format(str(data))
378
+                raise BadIMAPFetchResponse(except_msg) from e
379
+
380
+            msg_container = MessageContainer(msg, msgid)
381
+            messages.append(msg_container)
382
+
313 383
         return messages
314 384
 
315 385
     def _notify_tracim(
316 386
         self,
317 387
         mails: typing.List[DecodedMail],
388
+        imapc: imapclient.IMAPClient
318 389
     ) -> None:
319 390
         """
320 391
         Send http request to tracim endpoint
321 392
         :param mails: list of mails to send
322
-        :return: unsended mails
393
+        :return: none
323 394
         """
324 395
         logger.debug(self, 'Notify tracim about {} new responses'.format(
325 396
             len(mails),
326 397
         ))
327
-        unsended_mails = []
328 398
         # TODO BS 20171124: Look around mail.get_from_address(), mail.get_key()
329 399
         # , mail.get_body() etc ... for raise InvalidEmailError if missing
330 400
         #  required informations (actually get_from_address raise IndexError
331 401
         #  if no from address for example) and catch it here
332 402
         while mails:
333 403
             mail = mails.pop()
404
+            body = mail.get_body(
405
+                use_html_parsing=self.use_html_parsing,
406
+                use_txt_parsing=self.use_txt_parsing,
407
+            )
408
+            from_address = mail.get_from_address()
409
+
410
+            # don't create element for 'empty' mail
411
+            if not body:
412
+                logger.warning(
413
+                    self,
414
+                    'Mail from {} has not valable content'.format(
415
+                        from_address
416
+                    ),
417
+                )
418
+                continue
419
+
334 420
             msg = {'token': self.token,
335
-                   'user_mail': mail.get_from_address(),
421
+                   'user_mail': from_address,
336 422
                    'content_id': mail.get_key(),
337 423
                    'payload': {
338
-                       'content': mail.get_body(
339
-                           use_html_parsing=self.use_html_parsing,
340
-                           use_txt_parsing=self.use_txt_parsing),
424
+                       'content': body,
341 425
                    }}
342 426
             try:
343 427
                 logger.debug(
@@ -355,66 +439,14 @@ class MailFetcher(object):
355 439
                         str(r.status_code),
356 440
                         details,
357 441
                     ))
358
-                # Flag all correctly checked mail, unseen the others
442
+                # Flag all correctly checked mail
359 443
                 if r.status_code in [200, 204, 400]:
360
-                    self._set_flag(mail.uid, IMAP_CHECKED_FLAG)
361
-                else:
362
-                    self._unset_flag(mail.uid, IMAP_SEEN_FLAG)
444
+                    imapc.add_flags((mail.uid,), IMAP_CHECKED_FLAG)
445
+                    imapc.add_flags((mail.uid,), IMAP_SEEN_FLAG)
363 446
             # TODO - G.M - Verify exception correctly works
364 447
             except requests.exceptions.Timeout as e:
365 448
                 log = 'Timeout error to transmit fetched mail to tracim : {}'
366 449
                 logger.error(self, log.format(str(e)))
367
-                unsended_mails.append(mail)
368
-                self._unset_flag(mail.uid, IMAP_SEEN_FLAG)
369 450
             except requests.exceptions.RequestException as e:
370 451
                 log = 'Fail to transmit fetched mail to tracim : {}'
371 452
                 logger.error(self, log.format(str(e)))
372
-                self._unset_flag(mail.uid, IMAP_SEEN_FLAG)
373
-
374
-    def _set_flag(
375
-            self,
376
-            uid: int,
377
-            flag: str,
378
-            ) -> None:
379
-        assert uid is not None
380
-
381
-        rv, data = self._connection.store(
382
-            uid,
383
-            '+FLAGS',
384
-            flag,
385
-        )
386
-        if rv == 'OK':
387
-            log = 'Message {uid} set as {flag}.'.format(
388
-                uid=uid,
389
-                flag=flag)
390
-            logger.debug(self, log)
391
-        else:
392
-            log = 'Can not set Message {uid} as {flag} : {rv}'.format(
393
-                uid=uid,
394
-                flag=flag,
395
-                rv=rv)
396
-            logger.error(self, log)
397
-
398
-    def _unset_flag(
399
-            self,
400
-            uid: int,
401
-            flag: str,
402
-            ) -> None:
403
-        assert uid is not None
404
-
405
-        rv, data = self._connection.store(
406
-            uid,
407
-            '-FLAGS',
408
-            flag,
409
-        )
410
-        if rv == 'OK':
411
-            log = 'Message {uid} unset as {flag}.'.format(
412
-                uid=uid,
413
-                flag=flag)
414
-            logger.debug(self, log)
415
-        else:
416
-            log = 'Can not unset Message {uid} as {flag} : {rv}'.format(
417
-                uid=uid,
418
-                flag=flag,
419
-                rv=rv)
420
-            logger.error(self, log)

+ 2 - 1
tracim/tracim/lib/email_processing/models.py Bestand weergeven

@@ -109,7 +109,8 @@ class HtmlBodyMailParts(BodyMailParts):
109 109
         if len(self._list) > 0:
110 110
             txt = BeautifulSoup(value.text, 'html.parser').get_text()
111 111
             txt = txt.replace('\n', '').strip()
112
-            if not txt:
112
+            img = BeautifulSoup(value.text, 'html.parser').find('img')
113
+            if not txt and not img:
113 114
                 value.part_type = self._list[-1].part_type
114 115
         BodyMailParts._check_value(value)
115 116
         BodyMailParts._append(self, value)

+ 25 - 38
tracim/tracim/lib/email_processing/sanitizer.py Bestand weergeven

@@ -1,42 +1,19 @@
1
+import typing
1 2
 from bs4 import BeautifulSoup, Tag
2
-
3
+from tracim.lib.email_processing.sanitizer_config.attrs_whitelist import ATTRS_WHITELIST  # nopep8
4
+from tracim.lib.email_processing.sanitizer_config.class_blacklist import CLASS_BLACKLIST  # nopep8
5
+from tracim.lib.email_processing.sanitizer_config.id_blacklist import ID_BLACKLIST  # nopep8
6
+from tracim.lib.email_processing.sanitizer_config.tag_blacklist import TAG_BLACKLIST  # nopep8
7
+from tracim.lib.email_processing.sanitizer_config.tag_whitelist import TAG_WHITELIST  # nopep8
3 8
 
4 9
 class HtmlSanitizerConfig(object):
5
-    # some Default_html_tags type
6
-    HTML_Heading_tag = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
7
-    HTML_Text_parts_tag = ['p',
8
-                           'br', 'hr',
9
-                           'pre', 'code', 'samp',  # preformatted content
10
-                           'q', 'blockquote',  # quotes
11
-                           ]
12
-    HTML_Text_format_tag = ['b', 'i', 'u', 'small', 'sub', 'sup', ]
13
-    HTML_Text_semantic_tag = ['strong', 'em',
14
-                              'mark', 'cite', 'dfn',
15
-                              'del', 'ins', ]
16
-    HTML_Table_tag = ['table',
17
-                      'thead', 'tfoot', 'tbody',
18
-                      'tr', 'td', 'caption', ]
19
-
20
-    HTML_List_tag = ['ul', 'li', 'ol',  # simple list
21
-                     'dl', 'dt', 'dd', ]  # definition list
22
-
23
-    # Rules
24
-    Tag_whitelist = HTML_Heading_tag \
25
-                    + HTML_Text_parts_tag \
26
-                    + HTML_Text_format_tag \
27
-                    + HTML_Text_semantic_tag \
28
-                    + HTML_Table_tag \
29
-                    + HTML_List_tag
30
-
31
-    Tag_blacklist = ['script', 'style']
32
-
33
-    # TODO - G.M - 2017-12-01 - Think about removing class/id Blacklist
34
-    # These elements are no longer required.
35
-    Class_blacklist = []
36
-    Id_blacklist = []
37
-
38
-    Attrs_whitelist = ['href']
39
-
10
+    # whitelist : keep tag and content
11
+    Tag_whitelist = TAG_WHITELIST
12
+    Attrs_whitelist = ATTRS_WHITELIST
13
+    # blacklist : remove content
14
+    Tag_blacklist = TAG_BLACKLIST
15
+    Class_blacklist = CLASS_BLACKLIST
16
+    Id_blacklist = ID_BLACKLIST
40 17
 
41 18
 class HtmlSanitizer(object):
42 19
     """
@@ -50,7 +27,7 @@ class HtmlSanitizer(object):
50 27
     """
51 28
 
52 29
     @classmethod
53
-    def sanitize(cls, html_body: str) -> str:
30
+    def sanitize(cls, html_body: str) -> typing.Optional[str]:
54 31
         soup = BeautifulSoup(html_body, 'html.parser')
55 32
         for tag in soup.findAll():
56 33
             if cls._tag_to_extract(tag):
@@ -62,7 +39,17 @@ class HtmlSanitizer(object):
62 39
                         del tag.attrs[attr]
63 40
             else:
64 41
                 tag.unwrap()
65
-        return str(soup)
42
+
43
+        if cls._is_content_empty(soup):
44
+            return None
45
+        else:
46
+            return str(soup)
47
+
48
+    @classmethod
49
+    def _is_content_empty(cls, soup):
50
+        img = soup.find('img')
51
+        txt = soup.get_text().replace('\n', '').strip()
52
+        return (not img and not txt)
66 53
 
67 54
     @classmethod
68 55
     def _tag_to_extract(cls, tag: Tag) -> bool:

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/attrs_whitelist.py Bestand weergeven

@@ -0,0 +1 @@
1
+ATTRS_WHITELIST = ['href']

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/class_blacklist.py Bestand weergeven

@@ -0,0 +1 @@
1
+CLASS_BLACKLIST =  []

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/id_blacklist.py Bestand weergeven

@@ -0,0 +1 @@
1
+ID_BLACKLIST = []

+ 1 - 0
tracim/tracim/lib/email_processing/sanitizer_config/tag_blacklist.py Bestand weergeven

@@ -0,0 +1 @@
1
+TAG_BLACKLIST = ['script', 'style']

+ 16 - 0
tracim/tracim/lib/email_processing/sanitizer_config/tag_whitelist.py Bestand weergeven

@@ -0,0 +1,16 @@
1
+TAG_WHITELIST = [
2
+    'b', 'blockquote', 'br',
3
+    'caption', 'cite', 'code',
4
+    'dd', 'del', 'dfn', 'dl', 'dt',
5
+    'em',
6
+    'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr',
7
+    'i', 'img', 'ins',
8
+    'li',
9
+    'mark',
10
+    'ol',
11
+    'p', 'pre',
12
+    'q',
13
+    'samp', 'small', 'strong', 'sub', 'sup',
14
+    'table', 'tbody', 'td', 'tfoot', 'thead', 'tr',
15
+    'u', 'ul'
16
+]

+ 1 - 1
tracim/tracim/model/auth.py Bestand weergeven

@@ -311,7 +311,7 @@ class User(DeclarativeBase):
311 311
         difference = now_seconds - auth_token_seconds
312 312
 
313 313
         if difference > validity_seconds:
314
-            self.auth_token = uuid.uuid4()
314
+            self.auth_token = str(uuid.uuid4())
315 315
             self.auth_token_created = datetime.utcnow()
316 316
             DBSession.flush()
317 317
 

+ 2 - 0
tracim/tracim/templates/master_anonymous.mak Bestand weergeven

@@ -11,6 +11,8 @@
11 11
         <meta name="author" content="">
12 12
         <link rel="icon" href="/favicon.ico">
13 13
 
14
+        <link href="${tg.url('/assets/css/default_skin.css')}" rel="stylesheet">
15
+        <link href="${tg.url('/assets/css/bootstrap.css')}" rel="stylesheet">
14 16
         <link href="${tg.url('/assets/dist/all.css')}" rel="stylesheet">
15 17
 
16 18
         <script>

+ 2 - 0
tracim/tracim/templates/master_authenticated.mak Bestand weergeven

@@ -12,6 +12,8 @@
12 12
         <meta name="author" content="">
13 13
         <link rel="icon" href="/favicon.ico">
14 14
 
15
+        <link href="${tg.url('/assets/css/default_skin.css')}" rel="stylesheet">
16
+        <link href="${tg.url('/assets/css/bootstrap.css')}" rel="stylesheet">
15 17
         <link href="${tg.url('/assets/dist/all.css')}" rel="stylesheet">
16 18
 
17 19
         <script>