diff --git a/setup.py b/setup.py
index 5946faf..7ed6c15 100644
--- a/setup.py
+++ b/setup.py
@@ -39,12 +39,15 @@ setup(
"wcwidth>=0.1.7",
"urwid>=2.0.0,<3.0",
"tomlkit>=0.10.0,<1.0",
- "html2text>=2020.1.16"
],
extras_require={
# Required to display rich text in the TUI
"richtext": [
- "urwidgets>=0.1,<0.2"
+ "urwidgets>=0.1,<0.2",
+ ],
+ "markdown": [
+ "pypandoc>=1.12.0,<2.0",
+ "pypandoc-binary>=1.12.0,<2.0",
],
"dev": [
"coverage",
diff --git a/tests/test_console.py b/tests/test_console.py
index 5eeb171..1d321df 100644
--- a/tests/test_console.py
+++ b/tests/test_console.py
@@ -152,210 +152,6 @@ def test_timeline(mock_get, monkeypatch, capsys):
assert err == ""
-@mock.patch('toot.http.get')
-def test_timeline_html_content(mock_get, monkeypatch, capsys):
- mock_get.return_value = MockResponse([{
- 'id': '111111111111111111',
- 'account': {
- 'display_name': 'Frank Zappa 🎸',
- 'acct': 'fz'
- },
- 'created_at': '2017-04-12T15:53:18.174Z',
- 'content': "
HTML Render Test
emphasized
underlined
bold
bold and italic
strikethrough
regular text
Code block:
10 PRINT \"HELLO WORLD\"
20 GOTO 10
Something blockquoted here. The indentation is maintained as the text line wraps.
- List item
- Nested item
- Another nested
- Another list item.
- Something else nested
- And a last nested
Blockquote
- List in BQ
- List item 2 in BQ
#hashtag #test
https://a.com text after link
",
- 'reblog': None,
- 'in_reply_to_id': None,
- 'media_attachments': [],
- }])
-
- console.run_command(app, user, 'timeline', ['--once'])
-
- mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10})
-
- out, err = capsys.readouterr()
- lines = out.split("\n")
- reference = [
- "────────────────────────────────────────────────────────────────────────────────────────────────────",
- "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC",
- "",
- "## HTML Render Test",
- "",
- " _emphasized_ ",
- " _underlined_ ",
- " **bold** ",
- " ** _bold and italic_** ",
- " ~~strikethrough~~ ",
- "regular text",
- "",
- "Code block:",
- "",
- " ",
- " 10 PRINT \"HELLO WORLD\" ",
- " 20 GOTO 10 ",
- " ",
- "> Something blockquoted here. The indentation is maintained as the text line wraps.",
- " 1. List item",
- " • Nested item",
- " • Another nested ",
- " 2. Another list item. ",
- " 1. Something else nested",
- " 2. And a last nested",
- "",
- "> Blockquote",
- "> 1. List in BQ",
- "> 2. List item 2 in BQ",
- ">",
- "",
- "#hashtag #test ",
- "https://a.com text after link",
- "",
- "ID 111111111111111111 ",
- "────────────────────────────────────────────────────────────────────────────────────────────────────",
- "",
- ]
-
- assert len(lines) == len(reference)
- for index, line in enumerate(lines):
- assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}"
-
- assert err == ""
-
-
-@mock.patch('toot.http.get')
-def test_timeline_html_content(mock_get, monkeypatch, capsys):
- mock_get.return_value = MockResponse([{
- 'id': '111111111111111111',
- 'account': {
- 'display_name': 'Frank Zappa 🎸',
- 'acct': 'fz'
- },
- 'created_at': '2017-04-12T15:53:18.174Z',
- 'content': "HTML Render Test
emphasized
underlined
bold
bold and italic
strikethrough
regular text
Code block:
10 PRINT \"HELLO WORLD\"
20 GOTO 10
Something blockquoted here. The indentation is maintained as the text line wraps.
- List item
- Nested item
- Another nested
- Another list item.
- Something else nested
- And a last nested
Blockquote
- List in BQ
- List item 2 in BQ
#hashtag #test
https://a.com text after link
",
- 'reblog': None,
- 'in_reply_to_id': None,
- 'media_attachments': [],
- }])
-
- console.run_command(app, user, 'timeline', ['--once'])
-
- mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10})
-
- out, err = capsys.readouterr()
- lines = out.split("\n")
- reference = [
- "────────────────────────────────────────────────────────────────────────────────────────────────────",
- "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC",
- "",
- "## HTML Render Test",
- "",
- " _emphasized_ ",
- " _underlined_ ",
- " **bold** ",
- " ** _bold and italic_** ",
- " ~~strikethrough~~ ",
- "regular text",
- "",
- "Code block:",
- "",
- " ",
- " 10 PRINT \"HELLO WORLD\" ",
- " 20 GOTO 10 ",
- " ",
- "> Something blockquoted here. The indentation is maintained as the text line wraps.",
- " 1. List item",
- " • Nested item",
- " • Another nested ",
- " 2. Another list item. ",
- " 1. Something else nested",
- " 2. And a last nested",
- "",
- "> Blockquote",
- "> 1. List in BQ",
- "> 2. List item 2 in BQ",
- ">",
- "",
- "#hashtag #test ",
- "https://a.com text after link",
- "",
- "ID 111111111111111111 ",
- "────────────────────────────────────────────────────────────────────────────────────────────────────",
- "",
- ]
-
- assert len(lines) == len(reference)
- for index, line in enumerate(lines):
- assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}"
-
- assert err == ""
-
-
-@mock.patch('toot.http.get')
-def test_timeline_html_content(mock_get, monkeypatch, capsys):
- mock_get.return_value = MockResponse([{
- 'id': '111111111111111111',
- 'account': {
- 'display_name': 'Frank Zappa 🎸',
- 'acct': 'fz'
- },
- 'created_at': '2017-04-12T15:53:18.174Z',
- 'content': "HTML Render Test
emphasized
underlined
bold
bold and italic
strikethrough
regular text
Code block:
10 PRINT \"HELLO WORLD\"
20 GOTO 10
Something blockquoted here. The indentation is maintained as the text line wraps.
- List item
- Nested item
- Another nested
- Another list item.
- Something else nested
- And a last nested
Blockquote
- List in BQ
- List item 2 in BQ
#hashtag #test
https://a.com text after link
",
- 'reblog': None,
- 'in_reply_to_id': None,
- 'media_attachments': [],
- }])
-
- console.run_command(app, user, 'timeline', ['--once'])
-
- mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10})
-
- out, err = capsys.readouterr()
- lines = out.split("\n")
- reference = [
- "────────────────────────────────────────────────────────────────────────────────────────────────────",
- "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC",
- "",
- "## HTML Render Test",
- "",
- " _emphasized_ ",
- " _underlined_ ",
- " **bold** ",
- " ** _bold and italic_** ",
- " ~~strikethrough~~ ",
- "regular text",
- "",
- "Code block:",
- "",
- " ",
- " 10 PRINT \"HELLO WORLD\" ",
- " 20 GOTO 10 ",
- " ",
- "> Something blockquoted here. The indentation is maintained as the text line wraps.",
- " 1. List item",
- " • Nested item",
- " • Another nested ",
- " 2. Another list item. ",
- " 1. Something else nested",
- " 2. And a last nested",
- "",
- "> Blockquote",
- "> 1. List in BQ",
- "> 2. List item 2 in BQ",
- ">",
- "",
- "#hashtag #test ",
- "https://a.com text after link",
- "",
- "ID 111111111111111111 ",
- "────────────────────────────────────────────────────────────────────────────────────────────────────",
- "",
- ]
-
- assert len(lines) == len(reference)
- for index, line in enumerate(lines):
- assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}"
-
- assert err == ""
-
-
@mock.patch('toot.http.get')
def test_timeline_with_re(mock_get, monkeypatch, capsys):
mock_get.return_value = MockResponse([{
diff --git a/toot/output.py b/toot/output.py
index 7bdc333..9bf7d91 100644
--- a/toot/output.py
+++ b/toot/output.py
@@ -2,11 +2,11 @@ import os
import re
import sys
import textwrap
-import html2text
from functools import lru_cache
from toot import settings
from toot.utils import get_text
+from toot.richtext import html_to_text
from toot.entities import Account, Instance, Notification, Poll, Status
from toot.wcstring import wc_wrap
from typing import List
@@ -321,20 +321,9 @@ def print_status(status: Status, width: int = 80):
def print_html(text, width=80):
- h2t = html2text.HTML2Text()
-
- h2t.body_width = width
- h2t.single_line_break = True
- h2t.ignore_links = True
- h2t.wrap_links = True
- h2t.wrap_list_items = True
- h2t.wrap_tables = True
- h2t.unicode_snob = True
- h2t.ul_item_mark = "\N{bullet}"
- markdown = h2t.handle(text).strip()
-
+ markdown = "\n".join(html_to_text(text, columns=width, highlight_tags=False))
print_out("")
- print_out(highlight_hashtags(markdown))
+ print_out(markdown)
def print_poll(poll: Poll):
diff --git a/toot/richtext/__init__.py b/toot/richtext/__init__.py
new file mode 100644
index 0000000..9888a5d
--- /dev/null
+++ b/toot/richtext/__init__.py
@@ -0,0 +1,25 @@
+from toot.tui.utils import highlight_hashtags
+from toot.utils import html_to_paragraphs
+from toot.wcstring import wc_wrap
+from typing import List
+
+try:
+ # first preference, render markup with pypandoc
+ from .markdown import html_to_text
+
+except ImportError:
+ # Fallback to render in plaintext
+ def html_to_text(html: str, columns=80, highlight_tags=False) -> List:
+ output = []
+ first = True
+ for paragraph in html_to_paragraphs(html):
+ if not first:
+ output.append("")
+ for line in paragraph:
+ for subline in wc_wrap(line, columns):
+ if highlight_tags:
+ output.append(highlight_hashtags(subline))
+ else:
+ output.append(subline)
+ first = False
+ return output
diff --git a/toot/richtext/markdown.py b/toot/richtext/markdown.py
new file mode 100644
index 0000000..a3ea03c
--- /dev/null
+++ b/toot/richtext/markdown.py
@@ -0,0 +1,11 @@
+from pypandoc import convert_text
+from typing import List
+
+
+def html_to_text(html: str, columns=80, highlight_tags=False) -> List:
+ return [convert_text(
+ html,
+ format="html",
+ to="gfm-raw_html",
+ extra_args=["--wrap=auto", f"--columns={columns}"],
+ )]
diff --git a/toot/tui/app.py b/toot/tui/app.py
index d90428d..838b7b3 100644
--- a/toot/tui/app.py
+++ b/toot/tui/app.py
@@ -1,13 +1,13 @@
import logging
import subprocess
import urwid
-import html2text
from concurrent.futures import ThreadPoolExecutor
from toot import api, config, __version__, settings
from toot.console import get_default_visibility
from toot.exceptions import ApiError
+from toot.richtext import html_to_text
from toot.utils.datetime import parse_datetime
from .compose import StatusComposer
@@ -656,12 +656,8 @@ class TUI(urwid.Frame):
return self.run_in_thread(_delete, done_callback=_done)
def copy_status(self, status):
- h2t = html2text.HTML2Text()
- h2t.body_width = 0 # nowrap
- h2t.single_line_break = True
- h2t.ignore_links = True
- h2t.unicode_snob = True
- h2t.ul_item_mark = "\N{bullet}"
+
+ markdown = "\n".join(html_to_text(status.original.data["content"], columns=1024, highlight_tags=False))
time = parse_datetime(status.original.data['created_at'])
time = time.strftime('%Y-%m-%d %H:%M %Z')
@@ -671,7 +667,7 @@ class TUI(urwid.Frame):
+ "\n"
+ (status.original.author.account or "")
+ "\n\n"
- + h2t.handle(status.original.data["content"]).strip()
+ + markdown
+ "\n\n"
+ f"Created at: {time}")
diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py
index 2793493..e0e43dc 100644
--- a/toot/tui/richtext/__init__.py
+++ b/toot/tui/richtext/__init__.py
@@ -1,27 +1,24 @@
import urwid
-import html2text
-
+from toot.tui.utils import highlight_hashtags
+from toot.utils import format_content
from typing import List
try:
+ # our first preference is to render using urwidgets
from .richtext import html_to_widgets, url_to_widget
+
except ImportError:
- # Fallback if urwidgets are not available
- def html_to_widgets(html: str) -> List[urwid.Widget]:
- return [
- urwid.Text(_format_markdown(html))
- ]
+ try:
+ # second preference, render markup with pypandoc
+ from .markdown import html_to_widgets, url_to_widget
- def url_to_widget(url: str):
- return urwid.Text(("link", url))
+ except ImportError:
+ # Fallback to render in plaintext
- def _format_markdown(html) -> str:
- h2t = html2text.HTML2Text()
- h2t.single_line_break = True
- h2t.ignore_links = True
- h2t.wrap_links = False
- h2t.wrap_list_items = False
- h2t.wrap_tables = False
- h2t.unicode_snob = True
- h2t.ul_item_mark = "\N{bullet}"
- return h2t.handle(html).strip()
+ def url_to_widget(url: str):
+ return urwid.Text(("link", url))
+
+ def html_to_widgets(html: str) -> List[urwid.Widget]:
+ return [
+ urwid.Text(highlight_hashtags(line)) for line in format_content(html)
+ ]
diff --git a/toot/tui/richtext/markdown.py b/toot/tui/richtext/markdown.py
new file mode 100644
index 0000000..dcc5e7a
--- /dev/null
+++ b/toot/tui/richtext/markdown.py
@@ -0,0 +1,21 @@
+import urwid
+from pypandoc import convert_text
+
+from typing import List
+
+
+def url_to_widget(url: str):
+ return urwid.Text(("link", url))
+
+
+def html_to_widgets(html: str) -> List[urwid.Widget]:
+ return [
+ urwid.Text(
+ convert_text(
+ html,
+ format="html",
+ to="gfm-raw_html",
+ extra_args=["--wrap=none"],
+ )
+ )
+ ]