Browse Source

feat(convert): devine les logos et les récupère

wip_revisions
François Poulain 1 month ago
parent
commit
8d6260d8a9
3 changed files with 53 additions and 1 deletions
  1. +48
    -1
      drupal2spip_lal/base/convert.py
  2. +4
    -0
      drupal2spip_lal/settings/base.py
  3. +1
    -0
      requirements/base.txt

+ 48
- 1
drupal2spip_lal/base/convert.py View File

@@ -1,9 +1,12 @@
import os
import re
from datetime import datetime
from itertools import groupby

from django.utils.timezone import make_aware, now
from django.conf import settings
from django.utils.timezone import make_aware

import request
from bs4 import BeautifulSoup as bs

from drupal2spip_lal.drupal import models as drupal
@@ -50,6 +53,47 @@ def strong_to_dl(html):
return '\n'.join(r)


def fetch_and_remove_logo(article):
def fetch_logo(src):
"""
SPIP gère les logos à la façon d'un hack : un fichier dans IMG nommé
'arton{}.{}'.format(article.pk, ext)
"""
ext = src.split('.')[-1]
filename = 'arton{}.{}'.format(article.pk, ext)
path = os.path.join(settings.SPIP_LOGO_DIR, filename)

r = request.get(src, stream=True)

with open(path, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)

def remove_img(img):
has_siblings = [
elem
for elem in list(img.previous_siblings) + list(img.next_siblings)
if elem != '\n'
]
if img.parent.name == 'a' and not has_siblings:
img = img.parent
img.replace_with('')

soup = bs(article.descriptif, 'html.parser')
img = soup.find('img')

src = img and img.attrs.get('src', None)

if src and src.startswith('/'):
src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)

if src and re.match(r'^(https?)?://', src):
fetch_logo(src)
remove_img(img)
article.descriptif = soup.prettify(formatter="html5")
article.save()


def sanitarize_html(html):
html = strong_to_dl(html)
soup = bs(html, 'html.parser')
@@ -116,6 +160,7 @@ def convert_node(node, update=False):
'en_ligne': convert_timestamp(node.user.access),
'maj': convert_timestamp(node.user.created),
}

auteur, _ = spip.Auteurs.objects.update_or_create(
login=node.user.name, defaults=user_attributes
)
@@ -123,3 +168,5 @@ def convert_node(node, update=False):
spip.AuteursLiens.objects.update_or_create(
auteur=auteur, id_objet=article.pk, objet='article'
)

fetch_and_remove_logo(article)

+ 4
- 0
drupal2spip_lal/settings/base.py View File

@@ -177,6 +177,8 @@ STATICFILES_FINDERS = [
# https://docs.djangoproject.com/en/stable/ref/settings/#media-root
MEDIA_ROOT = var_dir('media')

SPIP_LOGO_DIR = env('SPIP_LOGO_DIR', default=MEDIA_ROOT)

# https://docs.djangoproject.com/en/stable/ref/settings/#media-url
MEDIA_URL = os.path.join(APP_LOCATION, 'media/')

@@ -249,3 +251,5 @@ CSRF_COOKIE_PATH = APP_LOCATION
# ------------------------------------------------------------------------------
# APPLICATION AND 3RD PARTY LIBRARY SETTINGS
# ------------------------------------------------------------------------------

DRUPAL_FQDN = env('DRUPAL_FQDN', default='www.april.org')

+ 1
- 0
requirements/base.txt View File

@@ -9,4 +9,5 @@ phpserialize

# HTML
# ------------------------------------------------------------------------------
request
beautifulsoup4

Loading…
Cancel
Save