From 1456a62b2596f66b80caf095393a9300e7518998 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Poulain?= Date: Mon, 3 Aug 2020 18:18:12 +0200 Subject: [PATCH] feat(convert): sanitarisation html avec bs --- drupal2spip_lal/base/convert.py | 6 ++++-- requirements/base.txt | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drupal2spip_lal/base/convert.py b/drupal2spip_lal/base/convert.py index 9d90f98..c28a866 100644 --- a/drupal2spip_lal/base/convert.py +++ b/drupal2spip_lal/base/convert.py @@ -2,6 +2,8 @@ from datetime import datetime from django.utils.timezone import make_aware, now +from bs4 import BeautifulSoup as bs + from drupal2spip_lal.drupal import models as drupal from drupal2spip_lal.spip import models as spip @@ -16,8 +18,8 @@ def convert_timestamp(timestamp): def sanitarize_html(html): - # FIXME: bs4 - return html + soup = bs(html, 'html.parser') + return soup.prettify(formatter="html5") def convert_node(node, update=False): diff --git a/requirements/base.txt b/requirements/base.txt index 0444250..4f842d5 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,3 +6,7 @@ django-environ ==0.4.5 # https://github.com/joke2k/django-environ # PHP # ------------------------------------------------------------------------------ phpserialize + +# HTML +# ------------------------------------------------------------------------------ +beautifulsoup4