feat(convert): ajoute les liens auto dans le filtrage html
This commit is contained in:
parent
7ce36668ea
commit
e4816bc2ee
@ -118,7 +118,49 @@ def filter_html(html):
|
||||
except Exception as e:
|
||||
raise ValueError("Echec de auto_p: {}".format(e))
|
||||
|
||||
def auto_a(html):
|
||||
soup = bs(html, 'html.parser')
|
||||
|
||||
email_pattern = re.compile(r'(\b[\w\.\+_-]+@(\w+\.)+\w+\b)')
|
||||
for line in soup.find_all(string=email_pattern):
|
||||
if line.parent.name == 'a':
|
||||
continue
|
||||
a_string = email_pattern.sub(
|
||||
r'<a class= "auto-a" href="mailto:\1">\1</a>', line.string
|
||||
)
|
||||
a_soup = bs(a_string, 'html.parser')
|
||||
line.replace_with(a_soup)
|
||||
|
||||
protocols = [
|
||||
'http',
|
||||
'https',
|
||||
'ftp',
|
||||
'news',
|
||||
'nntp',
|
||||
'tel',
|
||||
'telnet',
|
||||
'mailto',
|
||||
'irc',
|
||||
'ssh',
|
||||
'sftp',
|
||||
'webcal',
|
||||
'rtsp',
|
||||
]
|
||||
link_pattern = re.compile(
|
||||
r'((\b({})s?)?://(\w+\.)+\w+/?[^\s]*)'.format('|'.join(protocols))
|
||||
)
|
||||
for line in soup.find_all(string=link_pattern):
|
||||
if line.parent.name == 'a':
|
||||
continue
|
||||
a_string = link_pattern.sub(
|
||||
r'<a class="auto-a" href="\1">\1</a>', line.string
|
||||
)
|
||||
a_soup = bs(a_string, 'html.parser')
|
||||
line.replace_with(a_soup)
|
||||
return str(soup)
|
||||
|
||||
html = auto_p(html)
|
||||
html = auto_a(html)
|
||||
return html
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user