feat(convert): ajoute les documents liés

This commit is contained in:
François Poulain 2020-08-09 15:58:14 +02:00
parent 9360cc134e
commit 734deddfc5
2 changed files with 95 additions and 11 deletions

View File

@ -6,6 +6,7 @@ from itertools import groupby
from subprocess import PIPE, Popen
from django.conf import settings
from django.db.models import F
from django.utils.timezone import make_aware, now
import request
@ -52,6 +53,26 @@ def strong_to_dl(html):
return '\n'.join(r)
def download(src, filename, force_download):
if src and src.startswith('/'):
src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
path = os.path.join(settings.SPIP_LOGO_DIR, filename)
if not os.access(path, os.R_OK) or force_download:
r = request.get(src, stream=True)
with open(path, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
def fetch_document(src, filename, force_download):
extension = filename.split('.')[-1] or 'unknown'
cible = os.path.join(extension, filename)
download(src, cible, force_download)
return extension, cible
def fetch_and_remove_logo(article, force_download):
def fetch_logo(src):
"""
@ -60,13 +81,7 @@ def fetch_and_remove_logo(article, force_download):
"""
ext = src.split('.')[-1]
filename = 'arton{}.{}'.format(article.pk, ext)
path = os.path.join(settings.SPIP_LOGO_DIR, filename)
if not os.access(path, os.R_OK) or force_download:
r = request.get(src, stream=True)
with open(path, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
download(src, filename, force_download)
def remove_img(img):
has_siblings = [
@ -84,10 +99,7 @@ def fetch_and_remove_logo(article, force_download):
src = img and img.attrs.get('src', None)
if src and src.startswith('/'):
src = 'http://{}{}'.format(settings.DRUPAL_FQDN, src)
if src and re.match(r'^(https?)?://', src):
if src and re.match(r'^(https?)?://|^/', src):
fetch_logo(src)
remove_img(img)
article.descriptif = str(soup)
@ -101,6 +113,8 @@ def fetch_and_remove_logo(article, force_download):
article.texte = str(soup)
article.save()
else:
logger.warn('Article {} has ignored logo: {}'.format(article.pk, src))
def filter_html(html):
@ -331,3 +345,56 @@ def convert_node(node, options):
spip.MotsLiens.objects.get_or_create(
mot=mot, id_objet=article.pk, objet='article'
)
images_mimes = [
'image/png',
'image/jpeg',
'image/svg+xml',
'image/gif',
]
audio_mimes = ['application/ogg', 'audio/x-wav', 'audio/mpeg']
video_mimes = ['video/mp4']
for upload in node.upload_set.filter(
revision=F('node__published_revision')
):
is_audio = upload.file.filemime in audio_mimes
is_image = upload.file.filemime in images_mimes
is_video = upload.file.filemime in video_mimes
extension, fichier = fetch_document(
upload.file.filepath, upload.file.filename, force_download
)
document_attributes = {
'media': 'file',
'extension': extension,
'fichier': fichier,
'titre': upload.description,
'date': convert_timestamp(upload.file.timestamp),
'taille': upload.file.filesize,
'date_publication': convert_timestamp(upload.file.timestamp),
'mode': 'image' if is_image else 'document',
}
if is_image:
document_attributes['media'] = 'image'
document_attributes['hauteur'] = 1
document_attributes['largeur'] = 1
elif is_audio:
document_attributes['media'] = 'audio'
document_attributes['duree'] = 1
elif is_video:
document_attributes['media'] = 'video'
document_attributes['duree'] = 1
document_attributes['hauteur'] = 1
document_attributes['largeur'] = 1
document, _ = spip.Documents.objects.get_or_create(
**document_attributes, defaults={'maj': now},
)
spip.DocumentsLiens.objects.get_or_create(
document=document,
id_objet=article.pk,
objet='article',
rang_lien=upload.weight,
)

View File

@ -91,6 +91,9 @@ DB_RELATIONS = {
'auteurs_liens': {
'id_auteur': ('id_auteur', 'auteurs', 'auteur'),
},
'documents_liens': {
'id_document': ('id_document', 'documents', 'document'),
},
'mots': {
'id_groupe': ('id_groupe', 'groupes_mots', 'groupe'),
},
@ -154,6 +157,20 @@ DB_PARAMS = {
'auteurs_liens': {
'vu': {'default': 'non'},
},
'documents': {
'id_vignette': {'default': 0},
'brise': {'default': 0},
'credits': {'default': ''},
'distant': {'default': 'non'},
'duree': {'default': 0},
'hauteur': {'default': 0},
'largeur': {'default': 0},
'statut': {'default': 'prop'},
},
'documents_liens': {
'vu': {'default': 'non'},
'rang_lien': {'default': 0},
},
'groupes_mots': {
'unseul': {'default': 'non'},
'obligatoire': {'default': 'non'},