mirror of
https://github.com/ihabunek/toot.git
synced 2025-02-02 15:07:51 -05:00
Merge pull request #83 from dlax/apos
Replace ' by "'" before parsing HTML
This commit is contained in:
commit
fc57d2695a
@ -126,7 +126,7 @@ def test_timeline(mock_get, monkeypatch, capsys):
|
|||||||
'username': 'fz'
|
'username': 'fz'
|
||||||
},
|
},
|
||||||
'created_at': '2017-04-12T15:53:18.174Z',
|
'created_at': '2017-04-12T15:53:18.174Z',
|
||||||
'content': "<p>The computer can't tell you the emotional story. It can give you the exact mathematical design, but what's missing is the eyebrows.</p>",
|
'content': "<p>The computer can't tell you the emotional story. It can give you the exact mathematical design, but what's missing is the eyebrows.</p>",
|
||||||
'reblog': None,
|
'reblog': None,
|
||||||
}])
|
}])
|
||||||
|
|
||||||
@ -136,6 +136,7 @@ def test_timeline(mock_get, monkeypatch, capsys):
|
|||||||
|
|
||||||
out, err = capsys.readouterr()
|
out, err = capsys.readouterr()
|
||||||
assert "The computer can't tell you the emotional story." in out
|
assert "The computer can't tell you the emotional story." in out
|
||||||
|
assert "but what's missing is the eyebrows." in out
|
||||||
assert "Frank Zappa" in out
|
assert "Frank Zappa" in out
|
||||||
assert "@fz" in out
|
assert "@fz" in out
|
||||||
|
|
||||||
|
@ -148,8 +148,8 @@ def print_timeline(items):
|
|||||||
content = item['reblog']['content'] if item['reblog'] else item['content']
|
content = item['reblog']['content'] if item['reblog'] else item['content']
|
||||||
reblogged = item['reblog']['account']['username'] if item['reblog'] else None
|
reblogged = item['reblog']['account']['username'] if item['reblog'] else None
|
||||||
|
|
||||||
soup = BeautifulSoup(content, "html.parser")
|
soup = BeautifulSoup(content.replace(''', "'"), "html.parser")
|
||||||
text = soup.get_text().replace(''', "'")
|
text = soup.get_text()
|
||||||
time = datetime.strptime(item['created_at'], "%Y-%m-%dT%H:%M:%S.%fZ")
|
time = datetime.strptime(item['created_at'], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -12,7 +12,7 @@ from toot.exceptions import ConsoleError
|
|||||||
|
|
||||||
def get_text(html):
|
def get_text(html):
|
||||||
"""Converts html to text, strips all tags."""
|
"""Converts html to text, strips all tags."""
|
||||||
text = BeautifulSoup(html, "html.parser").get_text().replace(''', "'")
|
text = BeautifulSoup(html.replace(''', "'"), "html.parser").get_text()
|
||||||
|
|
||||||
return unicodedata.normalize('NFKC', text)
|
return unicodedata.normalize('NFKC', text)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user