feat(convert): implante les filtres html autobr et autop de drupal
This commit is contained in:
parent
cc923c59a2
commit
c8a075fc7a
@ -110,10 +110,48 @@ def fetch_and_remove_logo(article, force_download):
|
||||
article.save()
|
||||
|
||||
|
||||
def sanitarize_html(html):
|
||||
def filter_html(html):
|
||||
def auto_p(html):
|
||||
re_paragraph = re.compile(r'\s*\n\s*\n\s*')
|
||||
soup = bs(html, 'html.parser')
|
||||
for string in soup.find_all(string=re_paragraph):
|
||||
new_item = bs('', 'html.parser')
|
||||
for substring in [s for s in re_paragraph.split(string) if s]:
|
||||
p = new_item.new_tag('p')
|
||||
p.string = substring.strip()
|
||||
new_item.append(p)
|
||||
string.replace_with(new_item)
|
||||
return str(soup)
|
||||
|
||||
def auto_br(html):
|
||||
re_break = re.compile(r'\s*\n\s*')
|
||||
soup = bs(html, 'html.parser')
|
||||
for string in soup.find_all(string=re_break):
|
||||
new_item = bs('', 'html.parser')
|
||||
for index, substring in enumerate([s for s in re_break.split(string) if s]):
|
||||
if index:
|
||||
br = new_item.new_tag('br')
|
||||
new_item.append(br)
|
||||
new_item.append(substring.strip())
|
||||
string.replace_with(new_item)
|
||||
return str(soup)
|
||||
|
||||
html = auto_p(html)
|
||||
html = auto_br(html)
|
||||
return html
|
||||
|
||||
|
||||
def sanitarize_html(html, node_fmt):
|
||||
html = strong_to_dl(html)
|
||||
soup = bs(html, 'html.parser')
|
||||
return str(soup)
|
||||
|
||||
if node_fmt == 'PHP code':
|
||||
raise NotImplementedError("Ce node est au format PHP.")
|
||||
if node_fmt == 'Filtered HTML':
|
||||
html = filter_html(html)
|
||||
if node_fmt == 'Filtered HTML':
|
||||
html = str(bs(html, 'html.parser'))
|
||||
|
||||
return html
|
||||
|
||||
|
||||
def convert_node(node, options):
|
||||
@ -139,7 +177,8 @@ def convert_node(node, options):
|
||||
or len(set(spip_urls.values_list('id_objet', flat=True))) != 1
|
||||
or spip.Urls.objects.filter(
|
||||
type='article', id_objet=article_id
|
||||
).count() != spip_urls.count()
|
||||
).count()
|
||||
!= spip_urls.count()
|
||||
):
|
||||
# incohérence dans les urls
|
||||
raise ValueError(
|
||||
@ -147,15 +186,19 @@ def convert_node(node, options):
|
||||
"de s'assurer qu'il s'agisse d'un import prééexistant."
|
||||
)
|
||||
|
||||
node_fmt = node.published_revision.format.name
|
||||
|
||||
article = None
|
||||
article_attributes = {
|
||||
'date': convert_timestamp(node.published_revision.timestamp),
|
||||
'date_modif': convert_timestamp(node.changed),
|
||||
'date_redac': convert_timestamp(node.created),
|
||||
'descriptif': sanitarize_html(node.published_revision.teaser),
|
||||
'descriptif': sanitarize_html(
|
||||
node.published_revision.teaser, node_fmt
|
||||
),
|
||||
'maj': convert_timestamp(node.changed),
|
||||
'statut': 'publie' if node.status else 'prepa',
|
||||
'texte': sanitarize_html(node.published_revision.body),
|
||||
'texte': sanitarize_html(node.published_revision.body, node_fmt),
|
||||
'titre': node.title,
|
||||
}
|
||||
if not spip_urls.exists():
|
||||
|
@ -5,7 +5,6 @@ from django.core.management.base import BaseCommand
|
||||
from drupal2spip_lal.base.convert import convert_node
|
||||
from drupal2spip_lal.drupal.models import Node
|
||||
|
||||
|
||||
logger = logging.getLogger('drupal2spip_lal')
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user