0
0
mirror of https://github.com/vim/vim.git synced 2025-09-23 03:43:49 -04:00

patch 9.1.0101: upper-case of German sharp s should be U+1E9E

Problem:  upper-case of ß should be U+1E9E (CAPITAL LETTER SHARP S)
          (fenuks)
Solution: Make gU, ~ and g~ convert the U+00DF LATIN SMALL LETTER SHARP S (ß)
          to U+1E9E LATIN CAPITAL LETTER SHARP S (ẞ), update tests
          (glepnir)

This is part of Unicode 5.1.0 from April 2008, so should be fairly safe
to use now and since 2017 is part of the German standard orthography,
according to Wikipedia:
https://en.wikipedia.org/wiki/Capital_%E1%BA%9E#cite_note-auto-12

There is however one exception: UnicodeData.txt for U+00DF
LATIN SMALL LETTER SHARP S does NOT define U+1E9E LATIN CAPITAL LETTER
SHARP S as its upper case version. Therefore, toupper() won't be able
to convert from lower sharp s to upper case sharp s (the other way
around however works, since U+00DF is considered the lower case
character of U+1E9E and therefore tolower() works correctly for the
upper case version).

fixes: #5573
closes: #14018

Signed-off-by: glepnir <glephunter@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
glepnir
2024-02-12 22:14:53 +01:00
committed by Christian Brabandt
parent f2d90a3511
commit bd1232a1fa
6 changed files with 56 additions and 16 deletions

View File

@@ -3454,6 +3454,8 @@ static convertStruct toLower[] =
{0x1e900,0x1e921,1,34} {0x1e900,0x1e921,1,34}
}; };
// Note: UnicodeData.txt does not define U+1E9E as being the corresponding upper
// case letter for U+00DF (ß), however it is part of the toLower table
static convertStruct toUpper[] = static convertStruct toUpper[] =
{ {
{0x61,0x7a,1,-32}, {0x61,0x7a,1,-32},

View File

@@ -1431,18 +1431,19 @@ swapchar(int op_type, pos_T *pos)
if (c >= 0x80 && op_type == OP_ROT13) if (c >= 0x80 && op_type == OP_ROT13)
return FALSE; return FALSE;
if (op_type == OP_UPPER && c == 0xdf // ~ is OP_NOP, g~ is OP_TILDE, gU is OP_UPPER
&& (enc_latin1like || STRCMP(p_enc, "iso-8859-2") == 0)) if ((op_type == OP_UPPER || op_type == OP_NOP || op_type == OP_TILDE)
&& c == 0xdf
&& (enc_latin1like || STRCMP(p_enc, "iso-8859-2") == 0))
{ {
pos_T sp = curwin->w_cursor; pos_T sp = curwin->w_cursor;
// Special handling of German sharp s: change to "SS". // Special handling for lowercase German sharp s (ß): convert to uppercase (ẞ).
curwin->w_cursor = *pos; curwin->w_cursor = *pos;
del_char(FALSE); del_char(FALSE);
ins_char('S'); ins_char(0x1E9E);
ins_char('S');
curwin->w_cursor = sp; curwin->w_cursor = sp;
inc(pos); return TRUE;
} }
if (enc_dbcs != 0 && c >= 0x100) // No lower/uppercase letter if (enc_dbcs != 0 && c >= 0x100) // No lower/uppercase letter

View File

@@ -2347,19 +2347,19 @@ func Test_normal30_changecase()
norm! 1ggVu norm! 1ggVu
call assert_equal('this is a simple test: äüöß', getline('.')) call assert_equal('this is a simple test: äüöß', getline('.'))
norm! VU norm! VU
call assert_equal('THIS IS A SIMPLE TEST: ÄÜÖSS', getline('.')) call assert_equal('THIS IS A SIMPLE TEST: ÄÜÖ', getline('.'))
norm! guu norm! guu
call assert_equal('this is a simple test: äüöss', getline('.')) call assert_equal('this is a simple test: äüöß', getline('.'))
norm! gUgU norm! gUgU
call assert_equal('THIS IS A SIMPLE TEST: ÄÜÖSS', getline('.')) call assert_equal('THIS IS A SIMPLE TEST: ÄÜÖ', getline('.'))
norm! gugu norm! gugu
call assert_equal('this is a simple test: äüöss', getline('.')) call assert_equal('this is a simple test: äüöß', getline('.'))
norm! gUU norm! gUU
call assert_equal('THIS IS A SIMPLE TEST: ÄÜÖSS', getline('.')) call assert_equal('THIS IS A SIMPLE TEST: ÄÜÖ', getline('.'))
norm! 010~ norm! 010~
call assert_equal('this is a SIMPLE TEST: ÄÜÖSS', getline('.')) call assert_equal('this is a SIMPLE TEST: ÄÜÖ', getline('.'))
norm! V~ norm! V~
call assert_equal('THIS IS A simple test: äüöss', getline('.')) call assert_equal('THIS IS A simple test: äüöß', getline('.'))
call assert_beeps('norm! c~') call assert_beeps('norm! c~')
%d %d
call assert_beeps('norm! ~') call assert_beeps('norm! ~')

View File

@@ -93,4 +93,39 @@ func Test_gap()
call assert_equal(["ABCD", "", "defg"], getline(1,3)) call assert_equal(["ABCD", "", "defg"], getline(1,3))
endfunc endfunc
" test that g~, ~ and gU correclty upper-cases ß
func Test_uppercase_sharp_ss()
new
call setline(1, repeat(['ß'], 4))
call cursor(1, 1)
norm! ~
call assert_equal('ẞ', getline(line('.')))
norm! ~
call assert_equal('ß', getline(line('.')))
call cursor(2, 1)
norm! g~l
call assert_equal('ẞ', getline(line('.')))
norm! g~l
call assert_equal('ß', getline(line('.')))
call cursor(3, 1)
norm! gUl
call assert_equal('ẞ', getline(line('.')))
norm! vgU
call assert_equal('ẞ', getline(line('.')))
norm! vgu
call assert_equal('ß', getline(line('.')))
norm! gul
call assert_equal('ß', getline(line('.')))
call cursor(4, 1)
norm! vgU
call assert_equal('ẞ', getline(line('.')))
norm! vgu
call assert_equal('ß', getline(line('.')))
bw!
endfunc
" vim: shiftwidth=2 sts=2 expandtab " vim: shiftwidth=2 sts=2 expandtab

View File

@@ -1020,9 +1020,9 @@ func Test_visual_change_case()
exe "normal Oblah di\rdoh dut\<Esc>VkUj\r" exe "normal Oblah di\rdoh dut\<Esc>VkUj\r"
" Uppercase part of two lines " Uppercase part of two lines
exe "normal ddppi333\<Esc>k0i222\<Esc>fyllvjfuUk" exe "normal ddppi333\<Esc>k0i222\<Esc>fyllvjfuUk"
call assert_equal(['the YOUTUSSEUU end', '- yOUSSTUSSEXu -', call assert_equal(['the YOUTUEUU end', '- yOUẞTUẞEXu -',
\ 'THE YOUTUSSEUU END', '111THE YOUTUSSEUU END', 'BLAH DI', 'DOH DUT', \ 'THE YOUTUEUU END', '111THE YOUTUEUU END', 'BLAH DI', 'DOH DUT',
\ '222the yoUTUSSEUU END', '333THE YOUTUßeuu end'], getline(2, '$')) \ '222the yoUTUEUU END', '333THE YOUTUßeuu end'], getline(2, '$'))
bwipe! bwipe!
endfunc endfunc

View File

@@ -704,6 +704,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
101,
/**/ /**/
100, 100,
/**/ /**/