Merge pull request #83 from dlax/apos

Replace ' by "'" before parsing HTML
2024-06-30 06:35:24 +00:00 · 2019-01-02 11:39:21 +01:00 · 2019-01-02 11:39:21 +01:00 · fc57d2695a
commit fc57d2695a
parent 14a580bc19 0f6bd920c3
3 changed files with 5 additions and 4 deletions
--- a/tests/test_console.py
+++ b/tests/test_console.py
@ -126,7 +126,7 @@ def test_timeline(mock_get, monkeypatch, capsys):
            'username': 'fz'
        },
        'created_at': '2017-04-12T15:53:18.174Z',
-        'content': "<p>The computer can't tell you the emotional story. It can give you the exact mathematical design, but what's missing is the eyebrows.</p>",
+        'content': "<p>The computer can&apos;t tell you the emotional story. It can give you the exact mathematical design, but what's missing is the eyebrows.</p>",
        'reblog': None,
    }])
@ -136,6 +136,7 @@ def test_timeline(mock_get, monkeypatch, capsys):
    out, err = capsys.readouterr()
    assert "The computer can't tell you the emotional story." in out
    assert "but what's missing is the eyebrows." in out
    assert "Frank Zappa" in out
    assert "@fz" in out
--- a/toot/output.py
+++ b/toot/output.py
@ -148,8 +148,8 @@ def print_timeline(items):
        content = item['reblog']['content'] if item['reblog'] else item['content']
        reblogged = item['reblog']['account']['username'] if item['reblog'] else None
-        soup = BeautifulSoup(content, "html.parser")
+        soup = BeautifulSoup(content.replace('&apos;', "'"), "html.parser")
-        text = soup.get_text().replace('&apos;', "'")
+        text = soup.get_text()
        time = datetime.strptime(item['created_at'], "%Y-%m-%dT%H:%M:%S.%fZ")
        return {
--- a/toot/utils.py
+++ b/toot/utils.py
@ -12,7 +12,7 @@ from toot.exceptions import ConsoleError
 def get_text(html):
    """Converts html to text, strips all tags."""
-    text = BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
+    text = BeautifulSoup(html.replace('&apos;', "'"), "html.parser").get_text()
    return unicodedata.normalize('NFKC', text)