From 734deddfc5966322466517b5fd88258c6a52ec67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Poulain?= <fpoulain@metrodore.fr>
Date: Sun, 9 Aug 2020 15:58:14 +0200
Subject: [PATCH] =?UTF-8?q?feat(convert):=20ajoute=20les=20documents=20li?=
 =?UTF-8?q?=C3=A9s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 drupal2spip_lal/base/convert.py               | 89 ++++++++++++++++---
 .../base/management/commands/inspectdb.py     | 17 ++++
 2 files changed, 95 insertions(+), 11 deletions(-)

diff --git a/drupal2spip_lal/base/convert.py b/drupal2spip_lal/base/convert.py
index cdcb28f..7c43bc3 100644
--- a/drupal2spip_lal/base/convert.py
+++ b/drupal2spip_lal/base/convert.py
@@ -6,6 +6,7 @@ from itertools import groupby
 from subprocess import PIPE, Popen
 
 from django.conf import settings
+from django.db.models import F
 from django.utils.timezone import make_aware, now
 
 import request
@@ -52,6 +53,26 @@ def strong_to_dl(html):
     return '\n'.join(r)
 
 
+def download(src, filename, force_download):
+    if src and src.startswith('/'):
+        src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
+
+    path = os.path.join(settings.SPIP_LOGO_DIR, filename)
+
+    if not os.access(path, os.R_OK) or force_download:
+        r = request.get(src, stream=True)
+        with open(path, 'wb') as fd:
+            for chunk in r.iter_content(chunk_size=128):
+                fd.write(chunk)
+
+
+def fetch_document(src, filename, force_download):
+    extension = filename.split('.')[-1] or 'unknown'
+    cible = os.path.join(extension, filename)
+    download(src, cible, force_download)
+    return extension, cible
+
+
 def fetch_and_remove_logo(article, force_download):
     def fetch_logo(src):
         """
@@ -60,13 +81,7 @@ def fetch_and_remove_logo(article, force_download):
         """
         ext = src.split('.')[-1]
         filename = 'arton{}.{}'.format(article.pk, ext)
-        path = os.path.join(settings.SPIP_LOGO_DIR, filename)
-
-        if not os.access(path, os.R_OK) or force_download:
-            r = request.get(src, stream=True)
-            with open(path, 'wb') as fd:
-                for chunk in r.iter_content(chunk_size=128):
-                    fd.write(chunk)
+        download(src, filename, force_download)
 
     def remove_img(img):
         has_siblings = [
@@ -84,10 +99,7 @@ def fetch_and_remove_logo(article, force_download):
 
     src = img and img.attrs.get('src', None)
 
-    if src and src.startswith('/'):
-        src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
-
-    if src and re.match(r'^(https?)?://', src):
+    if src and re.match(r'^(https?)?://|^/', src):
         fetch_logo(src)
         remove_img(img)
         article.descriptif = str(soup)
@@ -101,6 +113,8 @@ def fetch_and_remove_logo(article, force_download):
         article.texte = str(soup)
 
         article.save()
+    else:
+        logger.warn('Article {} has ignored logo: {}'.format(article.pk, src))
 
 
 def filter_html(html):
@@ -331,3 +345,56 @@ def convert_node(node, options):
             spip.MotsLiens.objects.get_or_create(
                 mot=mot, id_objet=article.pk, objet='article'
             )
+
+        images_mimes = [
+            'image/png',
+            'image/jpeg',
+            'image/svg+xml',
+            'image/gif',
+        ]
+        audio_mimes = ['application/ogg', 'audio/x-wav', 'audio/mpeg']
+        video_mimes = ['video/mp4']
+
+        for upload in node.upload_set.filter(
+            revision=F('node__published_revision')
+        ):
+            is_audio = upload.file.filemime in audio_mimes
+            is_image = upload.file.filemime in images_mimes
+            is_video = upload.file.filemime in video_mimes
+
+            extension, fichier = fetch_document(
+                upload.file.filepath, upload.file.filename, force_download
+            )
+
+            document_attributes = {
+                'media': 'file',
+                'extension': extension,
+                'fichier': fichier,
+                'titre': upload.description,
+                'date': convert_timestamp(upload.file.timestamp),
+                'taille': upload.file.filesize,
+                'date_publication': convert_timestamp(upload.file.timestamp),
+                'mode': 'image' if is_image else 'document',
+            }
+            if is_image:
+                document_attributes['media'] = 'image'
+                document_attributes['hauteur'] = 1
+                document_attributes['largeur'] = 1
+            elif is_audio:
+                document_attributes['media'] = 'audio'
+                document_attributes['duree'] = 1
+            elif is_video:
+                document_attributes['media'] = 'video'
+                document_attributes['duree'] = 1
+                document_attributes['hauteur'] = 1
+                document_attributes['largeur'] = 1
+
+            document, _ = spip.Documents.objects.get_or_create(
+                **document_attributes, defaults={'maj': now},
+            )
+            spip.DocumentsLiens.objects.get_or_create(
+                document=document,
+                id_objet=article.pk,
+                objet='article',
+                rang_lien=upload.weight,
+            )
diff --git a/drupal2spip_lal/base/management/commands/inspectdb.py b/drupal2spip_lal/base/management/commands/inspectdb.py
index b646f91..12f0a62 100644
--- a/drupal2spip_lal/base/management/commands/inspectdb.py
+++ b/drupal2spip_lal/base/management/commands/inspectdb.py
@@ -91,6 +91,9 @@ DB_RELATIONS = {
         'auteurs_liens': {
             'id_auteur': ('id_auteur', 'auteurs', 'auteur'),
         },
+        'documents_liens': {
+            'id_document': ('id_document', 'documents', 'document'),
+        },
         'mots': {
             'id_groupe': ('id_groupe', 'groupes_mots', 'groupe'),
         },
@@ -154,6 +157,20 @@ DB_PARAMS = {
         'auteurs_liens': {
             'vu': {'default': 'non'},
         },
+        'documents': {
+            'id_vignette': {'default': 0},
+            'brise': {'default': 0},
+            'credits': {'default': ''},
+            'distant': {'default': 'non'},
+            'duree': {'default': 0},
+            'hauteur': {'default': 0},
+            'largeur': {'default': 0},
+            'statut': {'default': 'prop'},
+        },
+        'documents_liens': {
+            'vu': {'default': 'non'},
+            'rang_lien': {'default': 0},
+        },
         'groupes_mots': {
             'unseul': {'default': 'non'},
             'obligatoire': {'default': 'non'},