From 578e02bb3e770e1bd8583642c399059d7a5a257f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Poulain?=
', html)
def auto_br(html):
re_break = re.compile(r'\s*\n\s*')
+ return re_break.sub('
', html)
+
+ def remove_spare_br(html):
soup = bs(html, 'html.parser')
- for string in soup.find_all(string=re_break):
- new_item = bs('', 'html.parser')
- for index, substring in enumerate([s for s in re_break.split(string) if s]):
- if index:
- br = new_item.new_tag('br')
- new_item.append(br)
- new_item.append(substring.strip())
- string.replace_with(new_item)
+ # ou dernier premier élément d'un parent de type bloc
+ def spare_br(elem):
+ return elem.name == 'br' and getattr(elem.parent, 'name', None) in block_elems and not (elem.next_sibling and elem.previous_sibling)
+ [elem.clear() for elem in soup.find_all(spare_br)]
return str(soup)
+ html = explicit_double_br(html)
html = auto_p(html)
html = auto_br(html)
+ html = remove_spare_br(html)
return html
@@ -146,12 +146,14 @@ def sanitarize_html(html, node_fmt):
if node_fmt == 'PHP code':
raise NotImplementedError("Ce node est au format PHP.")
- if node_fmt == 'Filtered HTML':
+ elif node_fmt == 'Filtered HTML':
html = filter_html(html)
- if node_fmt == 'Filtered HTML':
- html = str(bs(html, 'html.parser'))
+ elif node_fmt == 'Full HTML':
+ pass
+ else:
+ raise NotImplementedError("Ce node est dans un format inconnu.")
- return html
+ return str(bs(html, 'html.parser'))
def convert_node(node, options):