feat(convert): devine les logos et les récupère
This commit is contained in:
parent
969e9bb155
commit
8d6260d8a9
@ -1,9 +1,12 @@
|
|||||||
|
import os
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from itertools import groupby
|
from itertools import groupby
|
||||||
|
|
||||||
from django.utils.timezone import make_aware, now
|
from django.conf import settings
|
||||||
|
from django.utils.timezone import make_aware
|
||||||
|
|
||||||
|
import request
|
||||||
from bs4 import BeautifulSoup as bs
|
from bs4 import BeautifulSoup as bs
|
||||||
|
|
||||||
from drupal2spip_lal.drupal import models as drupal
|
from drupal2spip_lal.drupal import models as drupal
|
||||||
@ -50,6 +53,47 @@ def strong_to_dl(html):
|
|||||||
return '\n'.join(r)
|
return '\n'.join(r)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_and_remove_logo(article):
|
||||||
|
def fetch_logo(src):
|
||||||
|
"""
|
||||||
|
SPIP gère les logos à la façon d'un hack : un fichier dans IMG nommé
|
||||||
|
'arton{}.{}'.format(article.pk, ext)
|
||||||
|
"""
|
||||||
|
ext = src.split('.')[-1]
|
||||||
|
filename = 'arton{}.{}'.format(article.pk, ext)
|
||||||
|
path = os.path.join(settings.SPIP_LOGO_DIR, filename)
|
||||||
|
|
||||||
|
r = request.get(src, stream=True)
|
||||||
|
|
||||||
|
with open(path, 'wb') as fd:
|
||||||
|
for chunk in r.iter_content(chunk_size=128):
|
||||||
|
fd.write(chunk)
|
||||||
|
|
||||||
|
def remove_img(img):
|
||||||
|
has_siblings = [
|
||||||
|
elem
|
||||||
|
for elem in list(img.previous_siblings) + list(img.next_siblings)
|
||||||
|
if elem != '\n'
|
||||||
|
]
|
||||||
|
if img.parent.name == 'a' and not has_siblings:
|
||||||
|
img = img.parent
|
||||||
|
img.replace_with('')
|
||||||
|
|
||||||
|
soup = bs(article.descriptif, 'html.parser')
|
||||||
|
img = soup.find('img')
|
||||||
|
|
||||||
|
src = img and img.attrs.get('src', None)
|
||||||
|
|
||||||
|
if src and src.startswith('/'):
|
||||||
|
src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
|
||||||
|
|
||||||
|
if src and re.match(r'^(https?)?://', src):
|
||||||
|
fetch_logo(src)
|
||||||
|
remove_img(img)
|
||||||
|
article.descriptif = soup.prettify(formatter="html5")
|
||||||
|
article.save()
|
||||||
|
|
||||||
|
|
||||||
def sanitarize_html(html):
|
def sanitarize_html(html):
|
||||||
html = strong_to_dl(html)
|
html = strong_to_dl(html)
|
||||||
soup = bs(html, 'html.parser')
|
soup = bs(html, 'html.parser')
|
||||||
@ -116,6 +160,7 @@ def convert_node(node, update=False):
|
|||||||
'en_ligne': convert_timestamp(node.user.access),
|
'en_ligne': convert_timestamp(node.user.access),
|
||||||
'maj': convert_timestamp(node.user.created),
|
'maj': convert_timestamp(node.user.created),
|
||||||
}
|
}
|
||||||
|
|
||||||
auteur, _ = spip.Auteurs.objects.update_or_create(
|
auteur, _ = spip.Auteurs.objects.update_or_create(
|
||||||
login=node.user.name, defaults=user_attributes
|
login=node.user.name, defaults=user_attributes
|
||||||
)
|
)
|
||||||
@ -123,3 +168,5 @@ def convert_node(node, update=False):
|
|||||||
spip.AuteursLiens.objects.update_or_create(
|
spip.AuteursLiens.objects.update_or_create(
|
||||||
auteur=auteur, id_objet=article.pk, objet='article'
|
auteur=auteur, id_objet=article.pk, objet='article'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
fetch_and_remove_logo(article)
|
||||||
|
@ -177,6 +177,8 @@ STATICFILES_FINDERS = [
|
|||||||
# https://docs.djangoproject.com/en/stable/ref/settings/#media-root
|
# https://docs.djangoproject.com/en/stable/ref/settings/#media-root
|
||||||
MEDIA_ROOT = var_dir('media')
|
MEDIA_ROOT = var_dir('media')
|
||||||
|
|
||||||
|
SPIP_LOGO_DIR = env('SPIP_LOGO_DIR', default=MEDIA_ROOT)
|
||||||
|
|
||||||
# https://docs.djangoproject.com/en/stable/ref/settings/#media-url
|
# https://docs.djangoproject.com/en/stable/ref/settings/#media-url
|
||||||
MEDIA_URL = os.path.join(APP_LOCATION, 'media/')
|
MEDIA_URL = os.path.join(APP_LOCATION, 'media/')
|
||||||
|
|
||||||
@ -249,3 +251,5 @@ CSRF_COOKIE_PATH = APP_LOCATION
|
|||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# APPLICATION AND 3RD PARTY LIBRARY SETTINGS
|
# APPLICATION AND 3RD PARTY LIBRARY SETTINGS
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
DRUPAL_FQDN = env('DRUPAL_FQDN', default='www.april.org')
|
||||||
|
@ -9,4 +9,5 @@ phpserialize
|
|||||||
|
|
||||||
# HTML
|
# HTML
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
request
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
|
Loading…
Reference in New Issue
Block a user