feat(convert): implante les filtres html autobr et autop de drupal
This commit is contained in:
parent
cc923c59a2
commit
c8a075fc7a
@ -110,10 +110,48 @@ def fetch_and_remove_logo(article, force_download):
|
|||||||
article.save()
|
article.save()
|
||||||
|
|
||||||
|
|
||||||
def sanitarize_html(html):
|
def filter_html(html):
|
||||||
|
def auto_p(html):
|
||||||
|
re_paragraph = re.compile(r'\s*\n\s*\n\s*')
|
||||||
|
soup = bs(html, 'html.parser')
|
||||||
|
for string in soup.find_all(string=re_paragraph):
|
||||||
|
new_item = bs('', 'html.parser')
|
||||||
|
for substring in [s for s in re_paragraph.split(string) if s]:
|
||||||
|
p = new_item.new_tag('p')
|
||||||
|
p.string = substring.strip()
|
||||||
|
new_item.append(p)
|
||||||
|
string.replace_with(new_item)
|
||||||
|
return str(soup)
|
||||||
|
|
||||||
|
def auto_br(html):
|
||||||
|
re_break = re.compile(r'\s*\n\s*')
|
||||||
|
soup = bs(html, 'html.parser')
|
||||||
|
for string in soup.find_all(string=re_break):
|
||||||
|
new_item = bs('', 'html.parser')
|
||||||
|
for index, substring in enumerate([s for s in re_break.split(string) if s]):
|
||||||
|
if index:
|
||||||
|
br = new_item.new_tag('br')
|
||||||
|
new_item.append(br)
|
||||||
|
new_item.append(substring.strip())
|
||||||
|
string.replace_with(new_item)
|
||||||
|
return str(soup)
|
||||||
|
|
||||||
|
html = auto_p(html)
|
||||||
|
html = auto_br(html)
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
def sanitarize_html(html, node_fmt):
|
||||||
html = strong_to_dl(html)
|
html = strong_to_dl(html)
|
||||||
soup = bs(html, 'html.parser')
|
|
||||||
return str(soup)
|
if node_fmt == 'PHP code':
|
||||||
|
raise NotImplementedError("Ce node est au format PHP.")
|
||||||
|
if node_fmt == 'Filtered HTML':
|
||||||
|
html = filter_html(html)
|
||||||
|
if node_fmt == 'Filtered HTML':
|
||||||
|
html = str(bs(html, 'html.parser'))
|
||||||
|
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
def convert_node(node, options):
|
def convert_node(node, options):
|
||||||
@ -139,7 +177,8 @@ def convert_node(node, options):
|
|||||||
or len(set(spip_urls.values_list('id_objet', flat=True))) != 1
|
or len(set(spip_urls.values_list('id_objet', flat=True))) != 1
|
||||||
or spip.Urls.objects.filter(
|
or spip.Urls.objects.filter(
|
||||||
type='article', id_objet=article_id
|
type='article', id_objet=article_id
|
||||||
).count() != spip_urls.count()
|
).count()
|
||||||
|
!= spip_urls.count()
|
||||||
):
|
):
|
||||||
# incohérence dans les urls
|
# incohérence dans les urls
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -147,15 +186,19 @@ def convert_node(node, options):
|
|||||||
"de s'assurer qu'il s'agisse d'un import prééexistant."
|
"de s'assurer qu'il s'agisse d'un import prééexistant."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
node_fmt = node.published_revision.format.name
|
||||||
|
|
||||||
article = None
|
article = None
|
||||||
article_attributes = {
|
article_attributes = {
|
||||||
'date': convert_timestamp(node.published_revision.timestamp),
|
'date': convert_timestamp(node.published_revision.timestamp),
|
||||||
'date_modif': convert_timestamp(node.changed),
|
'date_modif': convert_timestamp(node.changed),
|
||||||
'date_redac': convert_timestamp(node.created),
|
'date_redac': convert_timestamp(node.created),
|
||||||
'descriptif': sanitarize_html(node.published_revision.teaser),
|
'descriptif': sanitarize_html(
|
||||||
|
node.published_revision.teaser, node_fmt
|
||||||
|
),
|
||||||
'maj': convert_timestamp(node.changed),
|
'maj': convert_timestamp(node.changed),
|
||||||
'statut': 'publie' if node.status else 'prepa',
|
'statut': 'publie' if node.status else 'prepa',
|
||||||
'texte': sanitarize_html(node.published_revision.body),
|
'texte': sanitarize_html(node.published_revision.body, node_fmt),
|
||||||
'titre': node.title,
|
'titre': node.title,
|
||||||
}
|
}
|
||||||
if not spip_urls.exists():
|
if not spip_urls.exists():
|
||||||
|
@ -5,7 +5,6 @@ from django.core.management.base import BaseCommand
|
|||||||
from drupal2spip_lal.base.convert import convert_node
|
from drupal2spip_lal.base.convert import convert_node
|
||||||
from drupal2spip_lal.drupal.models import Node
|
from drupal2spip_lal.drupal.models import Node
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger('drupal2spip_lal')
|
logger = logging.getLogger('drupal2spip_lal')
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user