feat(convert): ajoute les documents liés

2020-08-09 15:58:14 +02:00 · 2020-08-09 15:58:14 +02:00 · 734deddfc5
commit 734deddfc5
parent 9360cc134e
2 changed files with 95 additions and 11 deletions
--- a/drupal2spip_lal/base/convert.py
+++ b/drupal2spip_lal/base/convert.py
@ -6,6 +6,7 @@ from itertools import groupby
 from subprocess import PIPE, Popen

 from django.conf import settings
+from django.db.models import F
 from django.utils.timezone import make_aware, now

 import request
@ -52,6 +53,26 @@ def strong_to_dl(html):
    return '\n'.join(r)


+def download(src, filename, force_download):
+    if src and src.startswith('/'):
+        src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
+
+    path = os.path.join(settings.SPIP_LOGO_DIR, filename)
+
+    if not os.access(path, os.R_OK) or force_download:
+        r = request.get(src, stream=True)
+        with open(path, 'wb') as fd:
+            for chunk in r.iter_content(chunk_size=128):
+                fd.write(chunk)
+
+
+def fetch_document(src, filename, force_download):
+    extension = filename.split('.')[-1] or 'unknown'
+    cible = os.path.join(extension, filename)
+    download(src, cible, force_download)
+    return extension, cible
+
+
 def fetch_and_remove_logo(article, force_download):
    def fetch_logo(src):
        """
@ -60,13 +81,7 @@ def fetch_and_remove_logo(article, force_download):
        """
        ext = src.split('.')[-1]
        filename = 'arton{}.{}'.format(article.pk, ext)
-        path = os.path.join(settings.SPIP_LOGO_DIR, filename)
-
-        if not os.access(path, os.R_OK) or force_download:
-            r = request.get(src, stream=True)
-            with open(path, 'wb') as fd:
-                for chunk in r.iter_content(chunk_size=128):
-                    fd.write(chunk)
+        download(src, filename, force_download)

    def remove_img(img):
        has_siblings = [
@ -84,10 +99,7 @@ def fetch_and_remove_logo(article, force_download):

    src = img and img.attrs.get('src', None)

-    if src and src.startswith('/'):
-        src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
-
-    if src and re.match(r'^(https?)?://', src):
+    if src and re.match(r'^(https?)?://|^/', src):
        fetch_logo(src)
        remove_img(img)
        article.descriptif = str(soup)
@ -101,6 +113,8 @@ def fetch_and_remove_logo(article, force_download):
        article.texte = str(soup)

        article.save()
+    else:
+        logger.warn('Article {} has ignored logo: {}'.format(article.pk, src))


 def filter_html(html):
@ -331,3 +345,56 @@ def convert_node(node, options):
            spip.MotsLiens.objects.get_or_create(
                mot=mot, id_objet=article.pk, objet='article'
            )
+
+        images_mimes = [
+            'image/png',
+            'image/jpeg',
+            'image/svg+xml',
+            'image/gif',
+        ]
+        audio_mimes = ['application/ogg', 'audio/x-wav', 'audio/mpeg']
+        video_mimes = ['video/mp4']
+
+        for upload in node.upload_set.filter(
+            revision=F('node__published_revision')
+        ):
+            is_audio = upload.file.filemime in audio_mimes
+            is_image = upload.file.filemime in images_mimes
+            is_video = upload.file.filemime in video_mimes
+
+            extension, fichier = fetch_document(
+                upload.file.filepath, upload.file.filename, force_download
+            )
+
+            document_attributes = {
+                'media': 'file',
+                'extension': extension,
+                'fichier': fichier,
+                'titre': upload.description,
+                'date': convert_timestamp(upload.file.timestamp),
+                'taille': upload.file.filesize,
+                'date_publication': convert_timestamp(upload.file.timestamp),
+                'mode': 'image' if is_image else 'document',
+            }
+            if is_image:
+                document_attributes['media'] = 'image'
+                document_attributes['hauteur'] = 1
+                document_attributes['largeur'] = 1
+            elif is_audio:
+                document_attributes['media'] = 'audio'
+                document_attributes['duree'] = 1
+            elif is_video:
+                document_attributes['media'] = 'video'
+                document_attributes['duree'] = 1
+                document_attributes['hauteur'] = 1
+                document_attributes['largeur'] = 1
+
+            document, _ = spip.Documents.objects.get_or_create(
+                **document_attributes, defaults={'maj': now},
+            )
+            spip.DocumentsLiens.objects.get_or_create(
+                document=document,
+                id_objet=article.pk,
+                objet='article',
+                rang_lien=upload.weight,
+            )
--- a/drupal2spip_lal/base/management/commands/inspectdb.py
+++ b/drupal2spip_lal/base/management/commands/inspectdb.py
@ -91,6 +91,9 @@ DB_RELATIONS = {
        'auteurs_liens': {
            'id_auteur': ('id_auteur', 'auteurs', 'auteur'),
        },
+        'documents_liens': {
+            'id_document': ('id_document', 'documents', 'document'),
+        },
        'mots': {
            'id_groupe': ('id_groupe', 'groupes_mots', 'groupe'),
        },
@ -154,6 +157,20 @@ DB_PARAMS = {
        'auteurs_liens': {
            'vu': {'default': 'non'},
        },
+        'documents': {
+            'id_vignette': {'default': 0},
+            'brise': {'default': 0},
+            'credits': {'default': ''},
+            'distant': {'default': 'non'},
+            'duree': {'default': 0},
+            'hauteur': {'default': 0},
+            'largeur': {'default': 0},
+            'statut': {'default': 'prop'},
+        },
+        'documents_liens': {
+            'vu': {'default': 'non'},
+            'rang_lien': {'default': 0},
+        },
        'groupes_mots': {
            'unseul': {'default': 'non'},
            'obligatoire': {'default': 'non'},