mirror of
https://github.com/ihabunek/toot.git
synced 2024-11-03 04:17:21 -05:00
Normalize unicode
This commit is contained in:
parent
cb1f7b4e61
commit
2ecc6a28c6
@ -2,6 +2,7 @@
|
||||
|
||||
import re
|
||||
import socket
|
||||
import unicodedata
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@ -10,7 +11,9 @@ from toot.exceptions import ConsoleError
|
||||
|
||||
def get_text(html):
|
||||
"""Converts html to text, strips all tags."""
|
||||
return BeautifulSoup(html, "html.parser").get_text().replace(''', "'")
|
||||
text = BeautifulSoup(html, "html.parser").get_text().replace(''', "'")
|
||||
|
||||
return unicodedata.normalize('NFKC', text)
|
||||
|
||||
|
||||
def parse_html(html):
|
||||
|
Loading…
Reference in New Issue
Block a user