From 734deddfc5966322466517b5fd88258c6a52ec67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Poulain?= Date: Sun, 9 Aug 2020 15:58:14 +0200 Subject: [PATCH] =?UTF-8?q?feat(convert):=20ajoute=20les=20documents=20li?= =?UTF-8?q?=C3=A9s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- drupal2spip_lal/base/convert.py | 89 ++++++++++++++++--- .../base/management/commands/inspectdb.py | 17 ++++ 2 files changed, 95 insertions(+), 11 deletions(-) diff --git a/drupal2spip_lal/base/convert.py b/drupal2spip_lal/base/convert.py index cdcb28f..7c43bc3 100644 --- a/drupal2spip_lal/base/convert.py +++ b/drupal2spip_lal/base/convert.py @@ -6,6 +6,7 @@ from itertools import groupby from subprocess import PIPE, Popen from django.conf import settings +from django.db.models import F from django.utils.timezone import make_aware, now import request @@ -52,6 +53,26 @@ def strong_to_dl(html): return '\n'.join(r) +def download(src, filename, force_download): + if src and src.startswith('/'): + src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src) + + path = os.path.join(settings.SPIP_LOGO_DIR, filename) + + if not os.access(path, os.R_OK) or force_download: + r = request.get(src, stream=True) + with open(path, 'wb') as fd: + for chunk in r.iter_content(chunk_size=128): + fd.write(chunk) + + +def fetch_document(src, filename, force_download): + extension = filename.split('.')[-1] or 'unknown' + cible = os.path.join(extension, filename) + download(src, cible, force_download) + return extension, cible + + def fetch_and_remove_logo(article, force_download): def fetch_logo(src): """ @@ -60,13 +81,7 @@ def fetch_and_remove_logo(article, force_download): """ ext = src.split('.')[-1] filename = 'arton{}.{}'.format(article.pk, ext) - path = os.path.join(settings.SPIP_LOGO_DIR, filename) - - if not os.access(path, os.R_OK) or force_download: - r = request.get(src, stream=True) - with open(path, 'wb') as fd: - for chunk in r.iter_content(chunk_size=128): - fd.write(chunk) + download(src, filename, force_download) def remove_img(img): has_siblings = [ @@ -84,10 +99,7 @@ def fetch_and_remove_logo(article, force_download): src = img and img.attrs.get('src', None) - if src and src.startswith('/'): - src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src) - - if src and re.match(r'^(https?)?://', src): + if src and re.match(r'^(https?)?://|^/', src): fetch_logo(src) remove_img(img) article.descriptif = str(soup) @@ -101,6 +113,8 @@ def fetch_and_remove_logo(article, force_download): article.texte = str(soup) article.save() + else: + logger.warn('Article {} has ignored logo: {}'.format(article.pk, src)) def filter_html(html): @@ -331,3 +345,56 @@ def convert_node(node, options): spip.MotsLiens.objects.get_or_create( mot=mot, id_objet=article.pk, objet='article' ) + + images_mimes = [ + 'image/png', + 'image/jpeg', + 'image/svg+xml', + 'image/gif', + ] + audio_mimes = ['application/ogg', 'audio/x-wav', 'audio/mpeg'] + video_mimes = ['video/mp4'] + + for upload in node.upload_set.filter( + revision=F('node__published_revision') + ): + is_audio = upload.file.filemime in audio_mimes + is_image = upload.file.filemime in images_mimes + is_video = upload.file.filemime in video_mimes + + extension, fichier = fetch_document( + upload.file.filepath, upload.file.filename, force_download + ) + + document_attributes = { + 'media': 'file', + 'extension': extension, + 'fichier': fichier, + 'titre': upload.description, + 'date': convert_timestamp(upload.file.timestamp), + 'taille': upload.file.filesize, + 'date_publication': convert_timestamp(upload.file.timestamp), + 'mode': 'image' if is_image else 'document', + } + if is_image: + document_attributes['media'] = 'image' + document_attributes['hauteur'] = 1 + document_attributes['largeur'] = 1 + elif is_audio: + document_attributes['media'] = 'audio' + document_attributes['duree'] = 1 + elif is_video: + document_attributes['media'] = 'video' + document_attributes['duree'] = 1 + document_attributes['hauteur'] = 1 + document_attributes['largeur'] = 1 + + document, _ = spip.Documents.objects.get_or_create( + **document_attributes, defaults={'maj': now}, + ) + spip.DocumentsLiens.objects.get_or_create( + document=document, + id_objet=article.pk, + objet='article', + rang_lien=upload.weight, + ) diff --git a/drupal2spip_lal/base/management/commands/inspectdb.py b/drupal2spip_lal/base/management/commands/inspectdb.py index b646f91..12f0a62 100644 --- a/drupal2spip_lal/base/management/commands/inspectdb.py +++ b/drupal2spip_lal/base/management/commands/inspectdb.py @@ -91,6 +91,9 @@ DB_RELATIONS = { 'auteurs_liens': { 'id_auteur': ('id_auteur', 'auteurs', 'auteur'), }, + 'documents_liens': { + 'id_document': ('id_document', 'documents', 'document'), + }, 'mots': { 'id_groupe': ('id_groupe', 'groupes_mots', 'groupe'), }, @@ -154,6 +157,20 @@ DB_PARAMS = { 'auteurs_liens': { 'vu': {'default': 'non'}, }, + 'documents': { + 'id_vignette': {'default': 0}, + 'brise': {'default': 0}, + 'credits': {'default': ''}, + 'distant': {'default': 'non'}, + 'duree': {'default': 0}, + 'hauteur': {'default': 0}, + 'largeur': {'default': 0}, + 'statut': {'default': 'prop'}, + }, + 'documents_liens': { + 'vu': {'default': 'non'}, + 'rang_lien': {'default': 0}, + }, 'groupes_mots': { 'unseul': {'default': 'non'}, 'obligatoire': {'default': 'non'},