feat(convert): ajoute la fouille des fichiers image et video

This commit is contained in:
François Poulain 2020-08-09 18:17:54 +02:00
parent d020005721
commit 11114ce004
2 changed files with 46 additions and 14 deletions

View File

@ -1,3 +1,4 @@
import json
import logging import logging
import os import os
import re import re
@ -12,6 +13,7 @@ from django.utils.timezone import make_aware, now
import mediafile import mediafile
import request import request
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
from PIL import Image
from drupal2spip_lal.drupal import models as drupal from drupal2spip_lal.drupal import models as drupal
from drupal2spip_lal.spip import models as spip from drupal2spip_lal.spip import models as spip
@ -54,6 +56,16 @@ def strong_to_dl(html):
return '\n'.join(r) return '\n'.join(r)
def probe_video(path):
with Popen(
['ffprobe', '-print_format', 'json', '-show_streams', path],
stdout=PIPE,
stderr=PIPE,
) as proc:
streams = json.load(proc.stdout)['streams']
return [s for s in streams if s["codec_type"] == "video"]
def download(src, filename, force_download): def download(src, filename, force_download):
if src and src.startswith('/'): if src and src.startswith('/'):
src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src) src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
@ -345,6 +357,10 @@ def convert_node(node, options):
fetch_and_remove_logo(article, force_download) fetch_and_remove_logo(article, force_download)
#
# Terms
#######
for term_node in node.termnode_set.all(): for term_node in node.termnode_set.all():
groupe, _ = spip.GroupesMots.objects.get_or_create( groupe, _ = spip.GroupesMots.objects.get_or_create(
titre=term_node.data.theme.name, titre=term_node.data.theme.name,
@ -363,6 +379,10 @@ def convert_node(node, options):
mot=mot, id_objet=article.pk, objet='article' mot=mot, id_objet=article.pk, objet='article'
) )
#
# Uploads
#########
images_mimes = [ images_mimes = [
'image/png', 'image/png',
'image/jpeg', 'image/jpeg',
@ -387,6 +407,8 @@ def convert_node(node, options):
'media': 'file', 'media': 'file',
'extension': extension, 'extension': extension,
'fichier': fichier, 'fichier': fichier,
}
document_defaults = {
'titre': upload.description, 'titre': upload.description,
'date': convert_timestamp(upload.file.timestamp), 'date': convert_timestamp(upload.file.timestamp),
'taille': upload.file.filesize, 'taille': upload.file.filesize,
@ -394,32 +416,41 @@ def convert_node(node, options):
'mode': 'image' if is_image else 'document', 'mode': 'image' if is_image else 'document',
} }
if is_image: if is_image:
document_attributes['media'] = 'image' document_defaults['media'] = 'image'
document_attributes['hauteur'] = 1 try:
document_attributes['largeur'] = 1 m = Image.open(path)
document_defaults['hauteur'] = m.height
document_defaults['largeur'] = m.width
except Exception as e:
logger.warn('Echec de lecture: {}'.format(e))
elif is_audio: elif is_audio:
document_attributes['media'] = 'audio' document_defaults['media'] = 'audio'
try: try:
m = mediafile.MediaFile(path) m = mediafile.MediaFile(path)
document_attributes['duree'] = m.length document_defaults['duree'] = m.length
if m.artist and m.album: if m.artist and m.album:
document_attributes['credits'] = '{} / {}'.format( document_defaults['credits'] = '{} / {}'.format(
m.artist, m.album m.artist, m.album
) )
elif m.artist: elif m.artist:
document_attributes['credits'] = m.artist document_defaults['credits'] = m.artist
elif m.album: elif m.album:
document_attributes['credits'] = m.album document_defaults['credits'] = m.album
except Exception as e: except Exception as e:
logger.warn('Echec de lecture: {}'.format(e)) logger.warn('Echec de lecture: {}'.format(e))
elif is_video: elif is_video:
document_attributes['media'] = 'video' document_defaults['media'] = 'video'
document_attributes['duree'] = 1 try:
document_attributes['hauteur'] = 1 m = probe_video(path)[0]
document_attributes['largeur'] = 1 document_defaults['duree'] = float(m.get('duration', 0))
document_defaults['hauteur'] = m.get('height', 0)
document_defaults['largeur'] = m.get('width', 0)
except Exception as e:
logger.warn('Echec de lecture: {}'.format(e))
document, _ = spip.Documents.objects.get_or_create( document, _ = spip.Documents.objects.update_or_create(
**document_attributes, defaults={'maj': now}, **document_attributes,
defaults={**document_defaults, 'maj': now},
) )
spip.DocumentsLiens.objects.get_or_create( spip.DocumentsLiens.objects.get_or_create(
document=document, document=document,

View File

@ -19,3 +19,4 @@ colorlog
# Media # Media
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
mediafile mediafile
pillow