0
0
mirror of https://github.com/vim/vim.git synced 2025-09-23 03:43:49 -04:00

patch 8.0.0243: tolower() does not work if the byte count changes

Problem:    When making a character lower case with tolower() changes the byte
            cound, it is not made lower case.
Solution:   Add strlow_save(). (Dominique Pelle, closes #1406)
This commit is contained in:
Bram Moolenaar
2017-01-26 22:51:56 +01:00
parent 65c836e600
commit cc5b22b3bf
5 changed files with 216 additions and 33 deletions

View File

@@ -12503,39 +12503,8 @@ f_timer_stopall(typval_T *argvars UNUSED, typval_T *rettv UNUSED)
static void static void
f_tolower(typval_T *argvars, typval_T *rettv) f_tolower(typval_T *argvars, typval_T *rettv)
{ {
char_u *p;
p = vim_strsave(get_tv_string(&argvars[0]));
rettv->v_type = VAR_STRING; rettv->v_type = VAR_STRING;
rettv->vval.v_string = p; rettv->vval.v_string = strlow_save(get_tv_string(&argvars[0]));
if (p != NULL)
while (*p != NUL)
{
#ifdef FEAT_MBYTE
int l;
if (enc_utf8)
{
int c, lc;
c = utf_ptr2char(p);
lc = utf_tolower(c);
l = utf_ptr2len(p);
/* TODO: reallocate string when byte count changes. */
if (utf_char2len(lc) == l)
utf_char2bytes(lc, p);
p += l;
}
else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
p += l; /* skip multi-byte character */
else
#endif
{
*p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
++p;
}
}
} }
/* /*

View File

@@ -1602,7 +1602,10 @@ strup_save(char_u *orig)
{ {
s = alloc((unsigned)STRLEN(res) + 1 + newl - l); s = alloc((unsigned)STRLEN(res) + 1 + newl - l);
if (s == NULL) if (s == NULL)
break; {
vim_free(res);
return NULL;
}
mch_memmove(s, res, p - res); mch_memmove(s, res, p - res);
STRCPY(s + (p - res) + newl, p + l); STRCPY(s + (p - res) + newl, p + l);
p = s + (p - res); p = s + (p - res);
@@ -1625,6 +1628,69 @@ strup_save(char_u *orig)
return res; return res;
} }
/*
* Make string "s" all lower-case and return it in allocated memory.
* Handles multi-byte characters as well as possible.
* Returns NULL when out of memory.
*/
char_u *
strlow_save(char_u *orig)
{
char_u *p;
char_u *res;
res = p = vim_strsave(orig);
if (res != NULL)
while (*p != NUL)
{
# ifdef FEAT_MBYTE
int l;
if (enc_utf8)
{
int c, lc;
int newl;
char_u *s;
c = utf_ptr2char(p);
lc = utf_tolower(c);
/* Reallocate string when byte count changes. This is rare,
* thus it's OK to do another malloc()/free(). */
l = utf_ptr2len(p);
newl = utf_char2len(lc);
if (newl != l)
{
s = alloc((unsigned)STRLEN(res) + 1 + newl - l);
if (s == NULL)
{
vim_free(res);
return NULL;
}
mch_memmove(s, res, p - res);
STRCPY(s + (p - res) + newl, p + l);
p = s + (p - res);
vim_free(res);
res = s;
}
utf_char2bytes(lc, p);
p += newl;
}
else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
p += l; /* skip multi-byte character */
else
# endif
{
*p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
p++;
}
}
return res;
}
#endif #endif
/* /*

View File

@@ -40,6 +40,7 @@ char_u *vim_strsave_up(char_u *string);
char_u *vim_strnsave_up(char_u *string, int len); char_u *vim_strnsave_up(char_u *string, int len);
void vim_strup(char_u *p); void vim_strup(char_u *p);
char_u *strup_save(char_u *orig); char_u *strup_save(char_u *orig);
char_u *strlow_save(char_u *orig);
void del_trailing_spaces(char_u *ptr); void del_trailing_spaces(char_u *ptr);
void vim_strncpy(char_u *to, char_u *from, size_t len); void vim_strncpy(char_u *to, char_u *from, size_t len);
void vim_strcat(char_u *to, char_u *from, size_t tosize); void vim_strcat(char_u *to, char_u *from, size_t tosize);

View File

@@ -16,3 +16,148 @@ func Test_str2nr()
call assert_equal(123456789, str2nr('123456789')) call assert_equal(123456789, str2nr('123456789'))
call assert_equal(-123456789, str2nr('-123456789')) call assert_equal(-123456789, str2nr('-123456789'))
endfunc endfunc
func Test_tolower()
call assert_equal("", tolower(""))
" Test with all printable ASCII characters.
call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
\ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
if !has('multi_byte')
return
endif
" Test with a few uppercase diacritics.
call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
call assert_equal("bḃḇ", tolower("BḂḆ"))
call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
call assert_equal("fḟ ", tolower("FḞ "))
call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
call assert_equal("jĵ", tolower("JĴ"))
call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
call assert_equal("mḿṁ", tolower("MḾṀ"))
call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
call assert_equal("pṕṗ", tolower("PṔṖ"))
call assert_equal("q", tolower("Q"))
call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
call assert_equal("vṽ", tolower("VṼ"))
call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
call assert_equal("xẋẍ", tolower("XẊẌ"))
call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
" Test with a few lowercase diacritics, which should remain unchanged.
call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
call assert_equal("bḃḇ", tolower("bḃḇ"))
call assert_equal("cçćĉċč", tolower("cçćĉċč"))
call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
call assert_equal("fḟ", tolower("fḟ"))
call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
call assert_equal("jĵǰ", tolower("jĵǰ"))
call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
call assert_equal("mḿṁ ", tolower("mḿṁ "))
call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ"))
call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
call assert_equal("pṕṗ", tolower("pṕṗ"))
call assert_equal("q", tolower("q"))
call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
call assert_equal("vṽ", tolower("vṽ"))
call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
call assert_equal("ẋẍ", tolower("ẋẍ"))
call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
" According to https://twitter.com/jifa/status/625776454479970304
" Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
" in length (2 to 3 bytes) when lowercased. So let's test them.
call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
endfunc
func Test_toupper()
call assert_equal("", toupper(""))
" Test with all printable ASCII characters.
call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
\ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
if !has('multi_byte')
return
endif
" Test with a few lowercase diacritics.
call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
call assert_equal("BḂḆ", toupper("bḃḇ"))
call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
call assert_equal("FḞ", toupper("fḟ"))
call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
call assert_equal("JĴǰ", toupper("jĵǰ"))
call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
call assert_equal("MḾṀ ", toupper("mḿṁ "))
call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ"))
call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
call assert_equal("PṔṖ", toupper("pṕṗ"))
call assert_equal("Q", toupper("q"))
call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
call assert_equal("VṼ", toupper("vṽ"))
call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
call assert_equal("ẊẌ", toupper("ẋẍ"))
call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
" Test that uppercase diacritics, which should remain unchanged.
call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
call assert_equal("BḂḆ", toupper("BḂḆ"))
call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
call assert_equal("FḞ ", toupper("FḞ "))
call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
call assert_equal("JĴ", toupper("JĴ"))
call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
call assert_equal("MḾṀ", toupper("MḾṀ"))
call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
call assert_equal("PṔṖ", toupper("PṔṖ"))
call assert_equal("Q", toupper("Q"))
call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
call assert_equal("VṼ", toupper("VṼ"))
call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
call assert_equal("XẊẌ", toupper("XẊẌ"))
call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
endfunc

View File

@@ -764,6 +764,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
243,
/**/ /**/
242, 242,
/**/ /**/