mirror of
https://github.com/ihabunek/toot.git
synced 2024-09-29 04:35:54 -04:00
Support for rendering a subset of HTML tags in status content
Code is adapted from GPL3-licensed muv by @seonon https://github.com/seonon/muv
This commit is contained in:
parent
6ce728e020
commit
6b2c3f09bf
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,6 +1,7 @@
|
|||||||
*.egg-info/
|
*.egg-info/
|
||||||
*.pyc
|
*.pyc
|
||||||
.pypirc
|
.pypirc
|
||||||
|
.vscode
|
||||||
/.cache/
|
/.cache/
|
||||||
/.coverage
|
/.coverage
|
||||||
/.env
|
/.env
|
||||||
@ -14,3 +15,4 @@
|
|||||||
debug.log
|
debug.log
|
||||||
/pyrightconfig.json
|
/pyrightconfig.json
|
||||||
/book
|
/book
|
||||||
|
/venv
|
@ -37,7 +37,38 @@ PALETTE = [
|
|||||||
('yellow_bold', 'yellow,bold', ''),
|
('yellow_bold', 'yellow,bold', ''),
|
||||||
('red', 'dark red', ''),
|
('red', 'dark red', ''),
|
||||||
('warning', 'light red', ''),
|
('warning', 'light red', ''),
|
||||||
('white_bold', 'white,bold', '')
|
('white_bold', 'white,bold', ''),
|
||||||
|
|
||||||
|
# HTML tag styling
|
||||||
|
|
||||||
|
# note, anchor styling is often overridden
|
||||||
|
# by class names in Mastodon statuses
|
||||||
|
# so you won't see the italics.
|
||||||
|
('a', ',italics', ''),
|
||||||
|
('em', 'white,italics', ''),
|
||||||
|
('i', 'white,italics', ''),
|
||||||
|
|
||||||
|
('strong', 'white,bold', ''),
|
||||||
|
('b', 'white,bold', ''),
|
||||||
|
|
||||||
|
('u', 'white,underline', ''),
|
||||||
|
|
||||||
|
('del', 'white, strikethrough', ''),
|
||||||
|
|
||||||
|
('code', 'white, standout', ''),
|
||||||
|
('pre', 'white, standout', ''),
|
||||||
|
|
||||||
|
('blockquote', 'light gray', ''),
|
||||||
|
|
||||||
|
('h1', 'yellow, bold', ''),
|
||||||
|
('h2', 'dark red, bold', ''),
|
||||||
|
('h3', 'yellow, bold', ''),
|
||||||
|
('h4', 'yellow, bold', ''),
|
||||||
|
('h5', 'yellow, bold', ''),
|
||||||
|
('h6', 'yellow, bold', ''),
|
||||||
|
|
||||||
|
('class_mention_hashtag', 'light cyan,bold', ''),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
VISIBILITY_OPTIONS = [
|
VISIBILITY_OPTIONS = [
|
||||||
|
267
toot/tui/richtext.py
Normal file
267
toot/tui/richtext.py
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
"""
|
||||||
|
richtext
|
||||||
|
"""
|
||||||
|
from typing import List
|
||||||
|
import urwid
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from bs4.element import NavigableString, Tag
|
||||||
|
|
||||||
|
|
||||||
|
class ContentParser:
|
||||||
|
def __init__(self, config={}):
|
||||||
|
"""Parse a limited subset of HTML and create urwid widgets."""
|
||||||
|
self.tag_to_method = {
|
||||||
|
"b": self.inline_tag_to_text,
|
||||||
|
"i": self.inline_tag_to_text,
|
||||||
|
"code": self.inline_tag_to_text,
|
||||||
|
"em": self.inline_tag_to_text,
|
||||||
|
"strong": self.inline_tag_to_text,
|
||||||
|
"del": self.inline_tag_to_text,
|
||||||
|
}
|
||||||
|
|
||||||
|
def html_to_widgets(self, html) -> List[urwid.Widget]:
|
||||||
|
"""Convert html to urwid widgets"""
|
||||||
|
widgets: List[urwid.Widget] = []
|
||||||
|
soup = BeautifulSoup(html.replace(''', "'"), "html.parser")
|
||||||
|
for e in soup.body or soup:
|
||||||
|
if isinstance(e, NavigableString):
|
||||||
|
continue
|
||||||
|
name = e.name
|
||||||
|
# get the custom method for the tag, defaulting to tag_to_text if none defined for this tag
|
||||||
|
method = self.tag_to_method.get(
|
||||||
|
name, getattr(self, "_" + name, self.inline_tag_to_text)
|
||||||
|
)
|
||||||
|
|
||||||
|
markup = method(e) # either returns a Widget, or plain text
|
||||||
|
if not isinstance(markup, urwid.Widget):
|
||||||
|
# plaintext, so create a padded text widget
|
||||||
|
txt = urwid.Text(markup)
|
||||||
|
markup = urwid.Padding(
|
||||||
|
txt,
|
||||||
|
align="left",
|
||||||
|
width=("relative", 100),
|
||||||
|
min_width=None,
|
||||||
|
)
|
||||||
|
widgets.append(markup)
|
||||||
|
return widgets
|
||||||
|
|
||||||
|
def inline_tag_to_text(self, tag) -> list:
|
||||||
|
"""Convert html tag to plain text with tag as attributes recursively"""
|
||||||
|
markups = self.process_inline_tag_children(tag)
|
||||||
|
if not markups:
|
||||||
|
return ""
|
||||||
|
return (tag.name, markups)
|
||||||
|
|
||||||
|
def process_inline_tag_children(self, tag) -> list:
|
||||||
|
markups = []
|
||||||
|
for child in tag.children:
|
||||||
|
if isinstance(child, Tag):
|
||||||
|
method = self.tag_to_method.get(
|
||||||
|
child.name, getattr(self, "_" + child.name, self.inline_tag_to_text)
|
||||||
|
)
|
||||||
|
markup = method(child)
|
||||||
|
markups.append(markup)
|
||||||
|
else:
|
||||||
|
markups.append(child)
|
||||||
|
return markups
|
||||||
|
|
||||||
|
def process_block_tag_children(self, tag) -> List[urwid.Widget]:
|
||||||
|
pre_widget_markups = []
|
||||||
|
post_widget_markups = []
|
||||||
|
child_widgets = []
|
||||||
|
found_nested_widget = False
|
||||||
|
|
||||||
|
for child in tag.children:
|
||||||
|
if isinstance(child, Tag):
|
||||||
|
# child is a nested tag; process using custom method
|
||||||
|
# or default to inline_tag_to_text
|
||||||
|
method = self.tag_to_method.get(
|
||||||
|
child.name, getattr(self, "_" + child.name, self.inline_tag_to_text)
|
||||||
|
)
|
||||||
|
result = method(child)
|
||||||
|
if isinstance(result, urwid.Widget):
|
||||||
|
found_nested_widget = True
|
||||||
|
child_widgets.append(result)
|
||||||
|
else:
|
||||||
|
if not found_nested_widget:
|
||||||
|
pre_widget_markups.append(result)
|
||||||
|
else:
|
||||||
|
post_widget_markups.append(result)
|
||||||
|
else:
|
||||||
|
# child is text; append to the appropriate markup list
|
||||||
|
if not found_nested_widget:
|
||||||
|
pre_widget_markups.append(child)
|
||||||
|
else:
|
||||||
|
post_widget_markups.append(child)
|
||||||
|
|
||||||
|
widget_list = []
|
||||||
|
if len(pre_widget_markups):
|
||||||
|
widget_list.append(urwid.Text((tag.name, pre_widget_markups)))
|
||||||
|
|
||||||
|
if len(child_widgets):
|
||||||
|
widget_list += child_widgets
|
||||||
|
|
||||||
|
if len(post_widget_markups):
|
||||||
|
widget_list.append(urwid.Text((tag.name, post_widget_markups)))
|
||||||
|
|
||||||
|
return widget_list
|
||||||
|
|
||||||
|
def get_style_name(self, tag) -> str:
|
||||||
|
# TODO: think about whitelisting allowed classes,
|
||||||
|
# or blacklisting classes we do not want.
|
||||||
|
# Classes to whitelist: "mention" "hashtag"
|
||||||
|
# used in anchor tags
|
||||||
|
# Classes to blacklist: "invisible" used in Akkoma
|
||||||
|
# anchor titles
|
||||||
|
style_name = tag.name
|
||||||
|
if "class" in tag.attrs:
|
||||||
|
clss = tag.attrs["class"]
|
||||||
|
if len(clss) > 0:
|
||||||
|
style_name = "class_" + "_".join(clss)
|
||||||
|
return style_name
|
||||||
|
|
||||||
|
# Tag handlers start here.
|
||||||
|
# Tags not explicitly listed are "supported" by
|
||||||
|
# rendering as text.
|
||||||
|
# Inline tags return a list of marked up text for urwid.Text
|
||||||
|
# Block tags return urwid.Widget
|
||||||
|
|
||||||
|
|
||||||
|
def basic_block_tag_handler(self, tag) -> urwid.Widget:
|
||||||
|
"""default for block tags that need no special treatment"""
|
||||||
|
return urwid.Pile(self.process_block_tag_children(tag))
|
||||||
|
|
||||||
|
def _a(self, tag) -> list:
|
||||||
|
markups = self.process_inline_tag_children(tag)
|
||||||
|
if not markups:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# hashtag anchors have a class of "mention hashtag"
|
||||||
|
# we'll return style "class_mention_hashtag"
|
||||||
|
# in that case; set this up in constants.py
|
||||||
|
# to control highlighting of hashtags
|
||||||
|
|
||||||
|
return (self.get_style_name(tag), markups)
|
||||||
|
|
||||||
|
def _blockquote(self, tag) -> urwid.Widget:
|
||||||
|
widget_list = self.process_block_tag_children(tag)
|
||||||
|
blockquote_widget = urwid.LineBox(
|
||||||
|
urwid.Padding(
|
||||||
|
urwid.Pile(widget_list),
|
||||||
|
align="left",
|
||||||
|
width=("relative", 100),
|
||||||
|
min_width=None,
|
||||||
|
left=1,
|
||||||
|
right=1,
|
||||||
|
),
|
||||||
|
tlcorner="",
|
||||||
|
tline="",
|
||||||
|
lline="│",
|
||||||
|
trcorner="",
|
||||||
|
blcorner="",
|
||||||
|
rline="",
|
||||||
|
bline="",
|
||||||
|
brcorner="",
|
||||||
|
)
|
||||||
|
return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
|
||||||
|
|
||||||
|
|
||||||
|
def _br(self, tag) -> list:
|
||||||
|
return (tag.name, ("br", "\n"))
|
||||||
|
|
||||||
|
_div = basic_block_tag_handler
|
||||||
|
|
||||||
|
_li = basic_block_tag_handler
|
||||||
|
|
||||||
|
# Glitch-soc and Pleroma allow <H1>...<H6> in content
|
||||||
|
# Mastodon (PR #23913) does not; header tags are converted to <STRONG>
|
||||||
|
|
||||||
|
_h1 = basic_block_tag_handler
|
||||||
|
|
||||||
|
_h2 = basic_block_tag_handler
|
||||||
|
|
||||||
|
_h3 = basic_block_tag_handler
|
||||||
|
|
||||||
|
_h4 = basic_block_tag_handler
|
||||||
|
|
||||||
|
_h5 = basic_block_tag_handler
|
||||||
|
|
||||||
|
_h6 = basic_block_tag_handler
|
||||||
|
|
||||||
|
def _ol(self, tag) -> urwid.Widget:
|
||||||
|
return self.list_widget(tag, ordered=True)
|
||||||
|
|
||||||
|
_p = basic_block_tag_handler
|
||||||
|
|
||||||
|
def _pre(self, tag) -> urwid.Widget:
|
||||||
|
|
||||||
|
# <PRE> tag spec says that text should not wrap,
|
||||||
|
# but horizontal screen space is at a premium
|
||||||
|
# and we have no horizontal scroll bar, so allow
|
||||||
|
# wrapping.
|
||||||
|
|
||||||
|
widget_list = [urwid.Divider(" ")]
|
||||||
|
widget_list += self.process_block_tag_children(tag)
|
||||||
|
|
||||||
|
pre_widget = urwid.Padding(
|
||||||
|
urwid.Pile(widget_list),
|
||||||
|
align="left",
|
||||||
|
width=("relative", 100),
|
||||||
|
min_width=None,
|
||||||
|
left=1,
|
||||||
|
right=1,
|
||||||
|
)
|
||||||
|
return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
|
||||||
|
|
||||||
|
def _span(self, tag) -> list:
|
||||||
|
markups = self.process_inline_tag_children(tag)
|
||||||
|
|
||||||
|
if not markups:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# span inherits its parent's class definition
|
||||||
|
# unless it has a specific class definition
|
||||||
|
# of its own
|
||||||
|
|
||||||
|
if "class" in tag.attrs:
|
||||||
|
style_name = self.get_style_name(tag)
|
||||||
|
elif tag.parent:
|
||||||
|
style_name = self.get_style_name(tag.parent)
|
||||||
|
else:
|
||||||
|
style_name = tag.name
|
||||||
|
|
||||||
|
return (style_name, markups)
|
||||||
|
|
||||||
|
def _ul(self, tag) -> urwid.Widget:
|
||||||
|
return self.list_widget(tag, ordered=False)
|
||||||
|
|
||||||
|
def list_widget(self, tag, ordered=False) -> urwid.Widget:
|
||||||
|
widgets = []
|
||||||
|
i = 1
|
||||||
|
for li in tag.find_all("li", recursive=False):
|
||||||
|
method = self.tag_to_method.get(
|
||||||
|
"li", getattr(self, "_li", self.inline_tag_to_text)
|
||||||
|
)
|
||||||
|
markup = method(li)
|
||||||
|
|
||||||
|
if not isinstance(markup, urwid.Widget):
|
||||||
|
if ordered:
|
||||||
|
txt = urwid.Text(
|
||||||
|
("li", [str(i), ". ", markup])
|
||||||
|
) # 1. foo, 2. bar, etc.
|
||||||
|
else:
|
||||||
|
txt = urwid.Text(("li", ["* ", markup])) # * foo, * bar, etc.
|
||||||
|
widgets.append(txt)
|
||||||
|
else:
|
||||||
|
if ordered:
|
||||||
|
txt = urwid.Text(("li", [str(i) + "."]))
|
||||||
|
else:
|
||||||
|
txt = urwid.Text(("li", "*"))
|
||||||
|
|
||||||
|
columns = urwid.Columns(
|
||||||
|
[txt, ("weight", 9999, markup)], dividechars=1, min_width=4
|
||||||
|
)
|
||||||
|
widgets.append(columns)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return urwid.Pile(widgets)
|
@ -7,11 +7,11 @@ from typing import List, Optional
|
|||||||
|
|
||||||
from .entities import Status
|
from .entities import Status
|
||||||
from .scroll import Scrollable, ScrollBar
|
from .scroll import Scrollable, ScrollBar
|
||||||
from .utils import highlight_hashtags, parse_datetime, highlight_keys
|
from .utils import parse_datetime, highlight_keys
|
||||||
from .widgets import SelectableText, SelectableColumns
|
from .widgets import SelectableText, SelectableColumns
|
||||||
|
from .richtext import ContentParser
|
||||||
from toot.tui import app
|
from toot.tui import app
|
||||||
from toot.tui.utils import time_ago
|
from toot.tui.utils import time_ago
|
||||||
from toot.utils import format_content
|
|
||||||
from toot.utils.language import language_name
|
from toot.utils.language import language_name
|
||||||
|
|
||||||
logger = logging.getLogger("toot")
|
logger = logging.getLogger("toot")
|
||||||
@ -341,8 +341,12 @@ class StatusDetails(urwid.Pile):
|
|||||||
yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
|
yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
|
||||||
else:
|
else:
|
||||||
content = status.original.translation if status.original.show_translation else status.data["content"]
|
content = status.original.translation if status.original.show_translation else status.data["content"]
|
||||||
for line in format_content(content):
|
|
||||||
yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags)))
|
parser = ContentParser()
|
||||||
|
widgetlist = parser.html_to_widgets(content)
|
||||||
|
|
||||||
|
for line in widgetlist:
|
||||||
|
yield (line)
|
||||||
|
|
||||||
media = status.data["media_attachments"]
|
media = status.data["media_attachments"]
|
||||||
if media:
|
if media:
|
||||||
|
Loading…
Reference in New Issue
Block a user