feat(convert): amélioration des nettoyages de logo

This commit is contained in:
François Poulain 2020-08-04 11:22:19 +02:00
parent 3dc21e7b94
commit 71300113fe

View File

@ -75,9 +75,10 @@ def fetch_and_remove_logo(article, force_download):
for elem in list(img.previous_siblings) + list(img.next_siblings) for elem in list(img.previous_siblings) + list(img.next_siblings)
if elem != '\n' if elem != '\n'
] ]
if img.parent.name == 'a' and not has_siblings: if img.parent.name in ['a', 'p'] and not has_siblings:
img = img.parent img.parent.replace_with('')
img.replace_with('') else:
img.replace_with('')
soup = bs(article.descriptif, 'html.parser') soup = bs(article.descriptif, 'html.parser')
img = soup.find('img') img = soup.find('img')
@ -91,6 +92,15 @@ def fetch_and_remove_logo(article, force_download):
fetch_logo(src) fetch_logo(src)
remove_img(img) remove_img(img)
article.descriptif = str(soup) article.descriptif = str(soup)
# L'image est généralement reprise dans le corps avec un format
# différent (par ex sans lien vers l'article).
soup = bs(article.texte, 'html.parser')
img = soup.find('img', src=src)
if img:
remove_img(img)
article.texte = str(soup)
article.save() article.save()