diff --git a/drupal2spip_lal/base/convert.py b/drupal2spip_lal/base/convert.py
index b53c046..76aaa96 100644
--- a/drupal2spip_lal/base/convert.py
+++ b/drupal2spip_lal/base/convert.py
@@ -118,7 +118,49 @@ def filter_html(html):
except Exception as e:
raise ValueError("Echec de auto_p: {}".format(e))
+ def auto_a(html):
+ soup = bs(html, 'html.parser')
+
+ email_pattern = re.compile(r'(\b[\w\.\+_-]+@(\w+\.)+\w+\b)')
+ for line in soup.find_all(string=email_pattern):
+ if line.parent.name == 'a':
+ continue
+ a_string = email_pattern.sub(
+ r'\1', line.string
+ )
+ a_soup = bs(a_string, 'html.parser')
+ line.replace_with(a_soup)
+
+ protocols = [
+ 'http',
+ 'https',
+ 'ftp',
+ 'news',
+ 'nntp',
+ 'tel',
+ 'telnet',
+ 'mailto',
+ 'irc',
+ 'ssh',
+ 'sftp',
+ 'webcal',
+ 'rtsp',
+ ]
+ link_pattern = re.compile(
+ r'((\b({})s?)?://(\w+\.)+\w+/?[^\s]*)'.format('|'.join(protocols))
+ )
+ for line in soup.find_all(string=link_pattern):
+ if line.parent.name == 'a':
+ continue
+ a_string = link_pattern.sub(
+ r'\1', line.string
+ )
+ a_soup = bs(a_string, 'html.parser')
+ line.replace_with(a_soup)
+ return str(soup)
+
html = auto_p(html)
+ html = auto_a(html)
return html