From 71300113feade013c5a22264483c02b69c62be61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Poulain?= Date: Tue, 4 Aug 2020 11:22:19 +0200 Subject: [PATCH] =?UTF-8?q?feat(convert):=20am=C3=A9lioration=20des=20nett?= =?UTF-8?q?oyages=20de=20logo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- drupal2spip_lal/base/convert.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drupal2spip_lal/base/convert.py b/drupal2spip_lal/base/convert.py index d3e6e8a..e02d420 100644 --- a/drupal2spip_lal/base/convert.py +++ b/drupal2spip_lal/base/convert.py @@ -75,9 +75,10 @@ def fetch_and_remove_logo(article, force_download): for elem in list(img.previous_siblings) + list(img.next_siblings) if elem != '\n' ] - if img.parent.name == 'a' and not has_siblings: - img = img.parent - img.replace_with('') + if img.parent.name in ['a', 'p'] and not has_siblings: + img.parent.replace_with('') + else: + img.replace_with('') soup = bs(article.descriptif, 'html.parser') img = soup.find('img') @@ -91,6 +92,15 @@ def fetch_and_remove_logo(article, force_download): fetch_logo(src) remove_img(img) article.descriptif = str(soup) + + # L'image est généralement reprise dans le corps avec un format + # différent (par ex sans lien vers l'article). + soup = bs(article.texte, 'html.parser') + img = soup.find('img', src=src) + if img: + remove_img(img) + article.texte = str(soup) + article.save()