From 2ecc6a28c6b1cd2efd4bd94d801954e87ab1b320 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Sun, 21 Jan 2018 16:39:40 +0100
Subject: [PATCH] Normalize unicode

---
 toot/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/toot/utils.py b/toot/utils.py
index b7ae649..dc22bfb 100644
--- a/toot/utils.py
+++ b/toot/utils.py
@@ -2,6 +2,7 @@
 
 import re
 import socket
+import unicodedata
 
 from bs4 import BeautifulSoup
 
@@ -10,7 +11,9 @@ from toot.exceptions import ConsoleError
 
 def get_text(html):
     """Converts html to text, strips all tags."""
-    return BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
+    text = BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
+
+    return unicodedata.normalize('NFKC', text)
 
 
 def parse_html(html):