diff --git a/drupal2spip_lal/base/convert.py b/drupal2spip_lal/base/convert.py index 76aaa96..cdcb28f 100644 --- a/drupal2spip_lal/base/convert.py +++ b/drupal2spip_lal/base/convert.py @@ -164,6 +164,44 @@ def filter_html(html): return html +def footnotes(html): + bracket_pattern = re.compile(r'\[\s*(/?)\s*(fn|footnote)\s*(\s[^\]]*)?\]') + html = bracket_pattern.sub(r'<\1\2\3>', html) + + soup = bs(html, 'html.parser') + + index = 1 + seen_values = [] + for fn in soup.find_all(['fn', 'footnote']): + if 'value' in fn.attrs and fn.attrs['value'] in seen_values: + value = fn.attrs['value'] + fn.clear() + elif 'value' in fn.attrs and fn.attrs['value']: + value = fn.attrs['value'] + seen_values.append(value) + else: + value = "" + seen_values.append(str(index)) + index += 1 + if value: + spip_fn = fn.wrap(soup.new_tag('spip:fn', value=value)) + else: + spip_fn = fn.wrap(soup.new_tag('spip:fn')) + spip_fn.fn.unwrap() + + html = str(soup) + # Spip a eu la bonne idée de choisir les crochets et chevrons + # pour indiquer les réfs. + spip_fn_open_pattern = re.compile(r'') + spip_fn_close_pattern = re.compile(r'') + spip_fn_value_pattern = re.compile(r'') + + html = spip_fn_open_pattern.sub(r'[[', html) + html = spip_fn_close_pattern.sub(r']]', html) + html = spip_fn_value_pattern.sub(r'[[<\1>', html) + return html + + def sanitarize_html(html, node_fmt): html = strong_to_dl(html) @@ -176,7 +214,9 @@ def sanitarize_html(html, node_fmt): else: raise NotImplementedError("Ce node est dans un format inconnu.") - return str(bs(html, 'html.parser')) + html = footnotes(html) + + return html def convert_node(node, options):