Add wcstring utils

2024-06-23 06:25:26 +00:00 · 2019-02-14 15:23:43 +01:00 · 2019-02-14 15:23:43 +01:00 · 769ff9e406
commit 769ff9e406
parent 8805a50194
2 changed files with 149 additions and 0 deletions
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -1,4 +1,5 @@
 from toot import utils
+from toot.wcstring import wc_wrap


 def test_pad():
@ -73,3 +74,85 @@ def test_fit_text():
    assert utils.fit_text(text, 18) == 'Frank Zappa 🎸    '
    assert utils.fit_text(text, 19) == 'Frank Zappa 🎸     '
    assert utils.fit_text(text, 20) == 'Frank Zappa 🎸      '
+
+
+def test_wc_wrap_plain_text():
+    lorem = (
+        "Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
+        "excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
+        "velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
+        "qui inventore sit."
+    )
+
+    assert list(wc_wrap(lorem, 50)) == [
+        #01234567890123456789012345678901234567890123456789 # noqa
+        "Eius voluptas eos praesentium et tempore. Quaerat",
+        "nihil voluptatem excepturi reiciendis sapiente",
+        "voluptate natus. Tenetur occaecati velit dicta",
+        "dolores. Illo reiciendis nulla ea. Facilis nostrum",
+        "non qui inventore sit.",
+    ]
+
+
+def test_wc_wrap_plain_text_wrap_on_any_whitespace():
+    lorem = (
+        "Eius\t\tvoluptas\teos\tpraesentium\tet\ttempore.\tQuaerat\tnihil\tvoluptatem\t"
+        "excepturi\nreiciendis\n\nsapiente\nvoluptate\nnatus.\nTenetur\noccaecati\n"
+        "velit\rdicta\rdolores.\rIllo\rreiciendis\rnulla\r\r\rea.\rFacilis\rnostrum\rnon\r"
+        "qui\u2003inventore\u2003\u2003sit."  # em space
+    )
+
+    assert list(wc_wrap(lorem, 50)) == [
+        #01234567890123456789012345678901234567890123456789 # noqa
+        "Eius voluptas eos praesentium et tempore. Quaerat",
+        "nihil voluptatem excepturi reiciendis sapiente",
+        "voluptate natus. Tenetur occaecati velit dicta",
+        "dolores. Illo reiciendis nulla ea. Facilis nostrum",
+        "non qui inventore sit.",
+    ]
+
+
+def test_wc_wrap_text_with_wide_chars():
+    lorem = (
+        "☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸. Quaerat nihil "
+        "voluptatem excepturi reiciendis sapiente voluptate natus."
+    )
+
+    assert list(wc_wrap(lorem, 50)) == [
+        #01234567890123456789012345678901234567890123456789 # noqa
+        "☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸.",
+        "Quaerat nihil voluptatem excepturi reiciendis",
+        "sapiente voluptate natus.",
+    ]
+
+
+def test_wc_wrap_hard_wrap():
+    lorem = (
+        "☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaeratnihil"
+        "voluptatemexcepturireiciendissapientevoluptatenatus."
+    )
+
+    assert list(wc_wrap(lorem, 50)) == [
+        #01234567890123456789012345678901234567890123456789 # noqa
+        "☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaer",
+        "atnihilvoluptatemexcepturireiciendissapientevolupt",
+        "atenatus.",
+    ]
+
+
+def test_wc_wrap_indented():
+    lorem = (
+        "     Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
+        "     excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
+        "     velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
+        "     qui inventore sit."
+    )
+
+    assert list(wc_wrap(lorem, 50)) == [
+        #01234567890123456789012345678901234567890123456789 # noqa
+        "Eius voluptas eos praesentium et tempore. Quaerat",
+        "nihil voluptatem excepturi reiciendis sapiente",
+        "voluptate natus. Tenetur occaecati velit dicta",
+        "dolores. Illo reiciendis nulla ea. Facilis nostrum",
+        "non qui inventore sit.",
+    ]
--- a/toot/wcstring.py
+++ b/toot/wcstring.py
@ -0,0 +1,66 @@
+"""
+Utilities for dealing with string containing wide characters.
+"""
+
+import re
+
+from wcwidth import wcwidth, wcswidth
+
+
+def _wc_hard_wrap(line, length):
+    """
+    Wrap text to length characters, breaking when target length is reached,
+    taking into account character width.
+
+    Used to wrap lines which cannot be wrapped on whitespace.
+    """
+    chars = []
+    chars_len = 0
+    for char in line:
+        char_len = wcwidth(char)
+        if chars_len + char_len > length:
+            yield "".join(chars)
+            chars = []
+            chars_len = 0
+
+        chars.append(char)
+        chars_len += char_len
+
+    if chars:
+        yield "".join(chars)
+
+
+def wc_wrap(text, length):
+    """
+    Wrap text to given length, breaking on whitespace and taking into account
+    character width.
+
+    Meant for use on a single line or paragraph. Will destroy spacing between
+    words and paragraphs and any indentation.
+    """
+    line_words = []
+    line_len = 0
+
+    words = re.split(r"\s+", text.strip())
+    for word in words:
+        word_len = wcswidth(word)
+
+        if line_words and line_len + word_len > length:
+            line = " ".join(line_words)
+            if line_len <= length:
+                yield line
+            else:
+                yield from _wc_hard_wrap(line, length)
+
+            line_words = []
+            line_len = 0
+
+        line_words.append(word)
+        line_len += word_len + 1  # add 1 to account for space between words
+
+    if line_words:
+        line = " ".join(line_words)
+        if line_len <= length:
+            yield line
+        else:
+            yield from _wc_hard_wrap(line, length)