feat(convert): devine les logos et les récupère
This commit is contained in:
parent
969e9bb155
commit
8d6260d8a9
@ -1,9 +1,12 @@
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from itertools import groupby
|
||||
|
||||
from django.utils.timezone import make_aware, now
|
||||
from django.conf import settings
|
||||
from django.utils.timezone import make_aware
|
||||
|
||||
import request
|
||||
from bs4 import BeautifulSoup as bs
|
||||
|
||||
from drupal2spip_lal.drupal import models as drupal
|
||||
@ -50,6 +53,47 @@ def strong_to_dl(html):
|
||||
return '\n'.join(r)
|
||||
|
||||
|
||||
def fetch_and_remove_logo(article):
|
||||
def fetch_logo(src):
|
||||
"""
|
||||
SPIP gère les logos à la façon d'un hack : un fichier dans IMG nommé
|
||||
'arton{}.{}'.format(article.pk, ext)
|
||||
"""
|
||||
ext = src.split('.')[-1]
|
||||
filename = 'arton{}.{}'.format(article.pk, ext)
|
||||
path = os.path.join(settings.SPIP_LOGO_DIR, filename)
|
||||
|
||||
r = request.get(src, stream=True)
|
||||
|
||||
with open(path, 'wb') as fd:
|
||||
for chunk in r.iter_content(chunk_size=128):
|
||||
fd.write(chunk)
|
||||
|
||||
def remove_img(img):
|
||||
has_siblings = [
|
||||
elem
|
||||
for elem in list(img.previous_siblings) + list(img.next_siblings)
|
||||
if elem != '\n'
|
||||
]
|
||||
if img.parent.name == 'a' and not has_siblings:
|
||||
img = img.parent
|
||||
img.replace_with('')
|
||||
|
||||
soup = bs(article.descriptif, 'html.parser')
|
||||
img = soup.find('img')
|
||||
|
||||
src = img and img.attrs.get('src', None)
|
||||
|
||||
if src and src.startswith('/'):
|
||||
src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
|
||||
|
||||
if src and re.match(r'^(https?)?://', src):
|
||||
fetch_logo(src)
|
||||
remove_img(img)
|
||||
article.descriptif = soup.prettify(formatter="html5")
|
||||
article.save()
|
||||
|
||||
|
||||
def sanitarize_html(html):
|
||||
html = strong_to_dl(html)
|
||||
soup = bs(html, 'html.parser')
|
||||
@ -116,6 +160,7 @@ def convert_node(node, update=False):
|
||||
'en_ligne': convert_timestamp(node.user.access),
|
||||
'maj': convert_timestamp(node.user.created),
|
||||
}
|
||||
|
||||
auteur, _ = spip.Auteurs.objects.update_or_create(
|
||||
login=node.user.name, defaults=user_attributes
|
||||
)
|
||||
@ -123,3 +168,5 @@ def convert_node(node, update=False):
|
||||
spip.AuteursLiens.objects.update_or_create(
|
||||
auteur=auteur, id_objet=article.pk, objet='article'
|
||||
)
|
||||
|
||||
fetch_and_remove_logo(article)
|
||||
|
@ -177,6 +177,8 @@ STATICFILES_FINDERS = [
|
||||
# https://docs.djangoproject.com/en/stable/ref/settings/#media-root
|
||||
MEDIA_ROOT = var_dir('media')
|
||||
|
||||
SPIP_LOGO_DIR = env('SPIP_LOGO_DIR', default=MEDIA_ROOT)
|
||||
|
||||
# https://docs.djangoproject.com/en/stable/ref/settings/#media-url
|
||||
MEDIA_URL = os.path.join(APP_LOCATION, 'media/')
|
||||
|
||||
@ -249,3 +251,5 @@ CSRF_COOKIE_PATH = APP_LOCATION
|
||||
# ------------------------------------------------------------------------------
|
||||
# APPLICATION AND 3RD PARTY LIBRARY SETTINGS
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
DRUPAL_FQDN = env('DRUPAL_FQDN', default='www.april.org')
|
||||
|
@ -9,4 +9,5 @@ phpserialize
|
||||
|
||||
# HTML
|
||||
# ------------------------------------------------------------------------------
|
||||
request
|
||||
beautifulsoup4
|
||||
|
Loading…
Reference in New Issue
Block a user