diff --git a/setup.py b/setup.py index 5946faf..7ed6c15 100644 --- a/setup.py +++ b/setup.py @@ -39,12 +39,15 @@ setup( "wcwidth>=0.1.7", "urwid>=2.0.0,<3.0", "tomlkit>=0.10.0,<1.0", - "html2text>=2020.1.16" ], extras_require={ # Required to display rich text in the TUI "richtext": [ - "urwidgets>=0.1,<0.2" + "urwidgets>=0.1,<0.2", + ], + "markdown": [ + "pypandoc>=1.12.0,<2.0", + "pypandoc-binary>=1.12.0,<2.0", ], "dev": [ "coverage", diff --git a/tests/test_console.py b/tests/test_console.py index 5eeb171..1d321df 100644 --- a/tests/test_console.py +++ b/tests/test_console.py @@ -152,210 +152,6 @@ def test_timeline(mock_get, monkeypatch, capsys): assert err == "" -@mock.patch('toot.http.get') -def test_timeline_html_content(mock_get, monkeypatch, capsys): - mock_get.return_value = MockResponse([{ - 'id': '111111111111111111', - 'account': { - 'display_name': 'Frank Zappa 🎸', - 'acct': 'fz' - }, - 'created_at': '2017-04-12T15:53:18.174Z', - 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", - 'reblog': None, - 'in_reply_to_id': None, - 'media_attachments': [], - }]) - - console.run_command(app, user, 'timeline', ['--once']) - - mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) - - out, err = capsys.readouterr() - lines = out.split("\n") - reference = [ - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", - "", - "## HTML Render Test", - "", - " _emphasized_ ", - " _underlined_ ", - " **bold** ", - " ** _bold and italic_** ", - " ~~strikethrough~~ ", - "regular text", - "", - "Code block:", - "", - " ", - " 10 PRINT \"HELLO WORLD\" ", - " 20 GOTO 10 ", - " ", - "> Something blockquoted here. The indentation is maintained as the text line wraps.", - " 1. List item", - " • Nested item", - " • Another nested ", - " 2. Another list item. ", - " 1. Something else nested", - " 2. And a last nested", - "", - "> Blockquote", - "> 1. List in BQ", - "> 2. List item 2 in BQ", - ">", - "", - "#hashtag #test ", - "https://a.com text after link", - "", - "ID 111111111111111111 ", - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "", - ] - - assert len(lines) == len(reference) - for index, line in enumerate(lines): - assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" - - assert err == "" - - -@mock.patch('toot.http.get') -def test_timeline_html_content(mock_get, monkeypatch, capsys): - mock_get.return_value = MockResponse([{ - 'id': '111111111111111111', - 'account': { - 'display_name': 'Frank Zappa 🎸', - 'acct': 'fz' - }, - 'created_at': '2017-04-12T15:53:18.174Z', - 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", - 'reblog': None, - 'in_reply_to_id': None, - 'media_attachments': [], - }]) - - console.run_command(app, user, 'timeline', ['--once']) - - mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) - - out, err = capsys.readouterr() - lines = out.split("\n") - reference = [ - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", - "", - "## HTML Render Test", - "", - " _emphasized_ ", - " _underlined_ ", - " **bold** ", - " ** _bold and italic_** ", - " ~~strikethrough~~ ", - "regular text", - "", - "Code block:", - "", - " ", - " 10 PRINT \"HELLO WORLD\" ", - " 20 GOTO 10 ", - " ", - "> Something blockquoted here. The indentation is maintained as the text line wraps.", - " 1. List item", - " • Nested item", - " • Another nested ", - " 2. Another list item. ", - " 1. Something else nested", - " 2. And a last nested", - "", - "> Blockquote", - "> 1. List in BQ", - "> 2. List item 2 in BQ", - ">", - "", - "#hashtag #test ", - "https://a.com text after link", - "", - "ID 111111111111111111 ", - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "", - ] - - assert len(lines) == len(reference) - for index, line in enumerate(lines): - assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" - - assert err == "" - - -@mock.patch('toot.http.get') -def test_timeline_html_content(mock_get, monkeypatch, capsys): - mock_get.return_value = MockResponse([{ - 'id': '111111111111111111', - 'account': { - 'display_name': 'Frank Zappa 🎸', - 'acct': 'fz' - }, - 'created_at': '2017-04-12T15:53:18.174Z', - 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", - 'reblog': None, - 'in_reply_to_id': None, - 'media_attachments': [], - }]) - - console.run_command(app, user, 'timeline', ['--once']) - - mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) - - out, err = capsys.readouterr() - lines = out.split("\n") - reference = [ - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", - "", - "## HTML Render Test", - "", - " _emphasized_ ", - " _underlined_ ", - " **bold** ", - " ** _bold and italic_** ", - " ~~strikethrough~~ ", - "regular text", - "", - "Code block:", - "", - " ", - " 10 PRINT \"HELLO WORLD\" ", - " 20 GOTO 10 ", - " ", - "> Something blockquoted here. The indentation is maintained as the text line wraps.", - " 1. List item", - " • Nested item", - " • Another nested ", - " 2. Another list item. ", - " 1. Something else nested", - " 2. And a last nested", - "", - "> Blockquote", - "> 1. List in BQ", - "> 2. List item 2 in BQ", - ">", - "", - "#hashtag #test ", - "https://a.com text after link", - "", - "ID 111111111111111111 ", - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "", - ] - - assert len(lines) == len(reference) - for index, line in enumerate(lines): - assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" - - assert err == "" - - @mock.patch('toot.http.get') def test_timeline_with_re(mock_get, monkeypatch, capsys): mock_get.return_value = MockResponse([{ diff --git a/toot/output.py b/toot/output.py index 7bdc333..9bf7d91 100644 --- a/toot/output.py +++ b/toot/output.py @@ -2,11 +2,11 @@ import os import re import sys import textwrap -import html2text from functools import lru_cache from toot import settings from toot.utils import get_text +from toot.richtext import html_to_text from toot.entities import Account, Instance, Notification, Poll, Status from toot.wcstring import wc_wrap from typing import List @@ -321,20 +321,9 @@ def print_status(status: Status, width: int = 80): def print_html(text, width=80): - h2t = html2text.HTML2Text() - - h2t.body_width = width - h2t.single_line_break = True - h2t.ignore_links = True - h2t.wrap_links = True - h2t.wrap_list_items = True - h2t.wrap_tables = True - h2t.unicode_snob = True - h2t.ul_item_mark = "\N{bullet}" - markdown = h2t.handle(text).strip() - + markdown = "\n".join(html_to_text(text, columns=width, highlight_tags=False)) print_out("") - print_out(highlight_hashtags(markdown)) + print_out(markdown) def print_poll(poll: Poll): diff --git a/toot/richtext/__init__.py b/toot/richtext/__init__.py new file mode 100644 index 0000000..9888a5d --- /dev/null +++ b/toot/richtext/__init__.py @@ -0,0 +1,25 @@ +from toot.tui.utils import highlight_hashtags +from toot.utils import html_to_paragraphs +from toot.wcstring import wc_wrap +from typing import List + +try: + # first preference, render markup with pypandoc + from .markdown import html_to_text + +except ImportError: + # Fallback to render in plaintext + def html_to_text(html: str, columns=80, highlight_tags=False) -> List: + output = [] + first = True + for paragraph in html_to_paragraphs(html): + if not first: + output.append("") + for line in paragraph: + for subline in wc_wrap(line, columns): + if highlight_tags: + output.append(highlight_hashtags(subline)) + else: + output.append(subline) + first = False + return output diff --git a/toot/richtext/markdown.py b/toot/richtext/markdown.py new file mode 100644 index 0000000..a3ea03c --- /dev/null +++ b/toot/richtext/markdown.py @@ -0,0 +1,11 @@ +from pypandoc import convert_text +from typing import List + + +def html_to_text(html: str, columns=80, highlight_tags=False) -> List: + return [convert_text( + html, + format="html", + to="gfm-raw_html", + extra_args=["--wrap=auto", f"--columns={columns}"], + )] diff --git a/toot/tui/app.py b/toot/tui/app.py index d90428d..838b7b3 100644 --- a/toot/tui/app.py +++ b/toot/tui/app.py @@ -1,13 +1,13 @@ import logging import subprocess import urwid -import html2text from concurrent.futures import ThreadPoolExecutor from toot import api, config, __version__, settings from toot.console import get_default_visibility from toot.exceptions import ApiError +from toot.richtext import html_to_text from toot.utils.datetime import parse_datetime from .compose import StatusComposer @@ -656,12 +656,8 @@ class TUI(urwid.Frame): return self.run_in_thread(_delete, done_callback=_done) def copy_status(self, status): - h2t = html2text.HTML2Text() - h2t.body_width = 0 # nowrap - h2t.single_line_break = True - h2t.ignore_links = True - h2t.unicode_snob = True - h2t.ul_item_mark = "\N{bullet}" + + markdown = "\n".join(html_to_text(status.original.data["content"], columns=1024, highlight_tags=False)) time = parse_datetime(status.original.data['created_at']) time = time.strftime('%Y-%m-%d %H:%M %Z') @@ -671,7 +667,7 @@ class TUI(urwid.Frame): + "\n" + (status.original.author.account or "") + "\n\n" - + h2t.handle(status.original.data["content"]).strip() + + markdown + "\n\n" + f"Created at: {time}") diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py index 2793493..e0e43dc 100644 --- a/toot/tui/richtext/__init__.py +++ b/toot/tui/richtext/__init__.py @@ -1,27 +1,24 @@ import urwid -import html2text - +from toot.tui.utils import highlight_hashtags +from toot.utils import format_content from typing import List try: + # our first preference is to render using urwidgets from .richtext import html_to_widgets, url_to_widget + except ImportError: - # Fallback if urwidgets are not available - def html_to_widgets(html: str) -> List[urwid.Widget]: - return [ - urwid.Text(_format_markdown(html)) - ] + try: + # second preference, render markup with pypandoc + from .markdown import html_to_widgets, url_to_widget - def url_to_widget(url: str): - return urwid.Text(("link", url)) + except ImportError: + # Fallback to render in plaintext - def _format_markdown(html) -> str: - h2t = html2text.HTML2Text() - h2t.single_line_break = True - h2t.ignore_links = True - h2t.wrap_links = False - h2t.wrap_list_items = False - h2t.wrap_tables = False - h2t.unicode_snob = True - h2t.ul_item_mark = "\N{bullet}" - return h2t.handle(html).strip() + def url_to_widget(url: str): + return urwid.Text(("link", url)) + + def html_to_widgets(html: str) -> List[urwid.Widget]: + return [ + urwid.Text(highlight_hashtags(line)) for line in format_content(html) + ] diff --git a/toot/tui/richtext/markdown.py b/toot/tui/richtext/markdown.py new file mode 100644 index 0000000..dcc5e7a --- /dev/null +++ b/toot/tui/richtext/markdown.py @@ -0,0 +1,21 @@ +import urwid +from pypandoc import convert_text + +from typing import List + + +def url_to_widget(url: str): + return urwid.Text(("link", url)) + + +def html_to_widgets(html: str) -> List[urwid.Widget]: + return [ + urwid.Text( + convert_text( + html, + format="html", + to="gfm-raw_html", + extra_args=["--wrap=none"], + ) + ) + ]