1
0
forked from aniani/vim

patch 8.2.3139: functions for string manipulation are spread out

Problem:    Functions for string manipulation are spread out.
Solution:   Move string related functions to a new source file. (Yegappan
            Lakshmanan, closes #8470)
This commit is contained in:
Yegappan Lakshmanan
2021-07-10 21:29:18 +02:00
committed by Bram Moolenaar
parent 31e21766d6
commit a2438132a6
21 changed files with 1673 additions and 1627 deletions

View File

@@ -1267,42 +1267,6 @@ free_all_mem(void)
}
#endif
/*
* Copy "string" into newly allocated memory.
*/
char_u *
vim_strsave(char_u *string)
{
char_u *p;
size_t len;
len = STRLEN(string) + 1;
p = alloc(len);
if (p != NULL)
mch_memmove(p, string, len);
return p;
}
/*
* Copy up to "len" bytes of "string" into newly allocated memory and
* terminate with a NUL.
* The allocated memory always has size "len + 1", also when "string" is
* shorter.
*/
char_u *
vim_strnsave(char_u *string, size_t len)
{
char_u *p;
p = alloc(len + 1);
if (p != NULL)
{
STRNCPY(p, string, len);
p[len] = NUL;
}
return p;
}
/*
* Copy "p[len]" into allocated memory, ignoring NUL characters.
* Returns NULL when out of memory.
@@ -1317,465 +1281,6 @@ vim_memsave(char_u *p, size_t len)
return ret;
}
/*
* Same as vim_strsave(), but any characters found in esc_chars are preceded
* by a backslash.
*/
char_u *
vim_strsave_escaped(char_u *string, char_u *esc_chars)
{
return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
}
/*
* Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
* characters where rem_backslash() would remove the backslash.
* Escape the characters with "cc".
*/
char_u *
vim_strsave_escaped_ext(
char_u *string,
char_u *esc_chars,
int cc,
int bsl)
{
char_u *p;
char_u *p2;
char_u *escaped_string;
unsigned length;
int l;
/*
* First count the number of backslashes required.
* Then allocate the memory and insert them.
*/
length = 1; // count the trailing NUL
for (p = string; *p; p++)
{
if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
{
length += l; // count a multibyte char
p += l - 1;
continue;
}
if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
++length; // count a backslash
++length; // count an ordinary char
}
escaped_string = alloc(length);
if (escaped_string != NULL)
{
p2 = escaped_string;
for (p = string; *p; p++)
{
if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
{
mch_memmove(p2, p, (size_t)l);
p2 += l;
p += l - 1; // skip multibyte char
continue;
}
if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
*p2++ = cc;
*p2++ = *p;
}
*p2 = NUL;
}
return escaped_string;
}
/*
* Return TRUE when 'shell' has "csh" in the tail.
*/
int
csh_like_shell(void)
{
return (strstr((char *)gettail(p_sh), "csh") != NULL);
}
/*
* Escape "string" for use as a shell argument with system().
* This uses single quotes, except when we know we need to use double quotes
* (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
* PowerShell also uses a novel escaping for enclosed single quotes - double
* them up.
* Escape a newline, depending on the 'shell' option.
* When "do_special" is TRUE also replace "!", "%", "#" and things starting
* with "<" like "<cfile>".
* When "do_newline" is FALSE do not escape newline unless it is csh shell.
* Returns the result in allocated memory, NULL if we have run out.
*/
char_u *
vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
{
unsigned length;
char_u *p;
char_u *d;
char_u *escaped_string;
int l;
int csh_like;
char_u *shname;
int powershell;
# ifdef MSWIN
int double_quotes;
# endif
// Only csh and similar shells expand '!' within single quotes. For sh and
// the like we must not put a backslash before it, it will be taken
// literally. If do_special is set the '!' will be escaped twice.
// Csh also needs to have "\n" escaped twice when do_special is set.
csh_like = csh_like_shell();
// PowerShell uses it's own version for quoting single quotes
shname = gettail(p_sh);
powershell = strstr((char *)shname, "pwsh") != NULL;
# ifdef MSWIN
powershell = powershell || strstr((char *)shname, "powershell") != NULL;
// PowerShell only accepts single quotes so override shellslash.
double_quotes = !powershell && !p_ssl;
# endif
// First count the number of extra bytes required.
length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
for (p = string; *p != NUL; MB_PTR_ADV(p))
{
# ifdef MSWIN
if (double_quotes)
{
if (*p == '"')
++length; // " -> ""
}
else
# endif
if (*p == '\'')
{
if (powershell)
length +=2; // ' => ''
else
length += 3; // ' => '\''
}
if ((*p == '\n' && (csh_like || do_newline))
|| (*p == '!' && (csh_like || do_special)))
{
++length; // insert backslash
if (csh_like && do_special)
++length; // insert backslash
}
if (do_special && find_cmdline_var(p, &l) >= 0)
{
++length; // insert backslash
p += l - 1;
}
}
// Allocate memory for the result and fill it.
escaped_string = alloc(length);
if (escaped_string != NULL)
{
d = escaped_string;
// add opening quote
# ifdef MSWIN
if (double_quotes)
*d++ = '"';
else
# endif
*d++ = '\'';
for (p = string; *p != NUL; )
{
# ifdef MSWIN
if (double_quotes)
{
if (*p == '"')
{
*d++ = '"';
*d++ = '"';
++p;
continue;
}
}
else
# endif
if (*p == '\'')
{
if (powershell)
{
*d++ = '\'';
*d++ = '\'';
}
else
{
*d++ = '\'';
*d++ = '\\';
*d++ = '\'';
*d++ = '\'';
}
++p;
continue;
}
if ((*p == '\n' && (csh_like || do_newline))
|| (*p == '!' && (csh_like || do_special)))
{
*d++ = '\\';
if (csh_like && do_special)
*d++ = '\\';
*d++ = *p++;
continue;
}
if (do_special && find_cmdline_var(p, &l) >= 0)
{
*d++ = '\\'; // insert backslash
while (--l >= 0) // copy the var
*d++ = *p++;
continue;
}
MB_COPY_CHAR(p, d);
}
// add terminating quote and finish with a NUL
# ifdef MSWIN
if (double_quotes)
*d++ = '"';
else
# endif
*d++ = '\'';
*d = NUL;
}
return escaped_string;
}
/*
* Like vim_strsave(), but make all characters uppercase.
* This uses ASCII lower-to-upper case translation, language independent.
*/
char_u *
vim_strsave_up(char_u *string)
{
char_u *p1;
p1 = vim_strsave(string);
vim_strup(p1);
return p1;
}
/*
* Like vim_strnsave(), but make all characters uppercase.
* This uses ASCII lower-to-upper case translation, language independent.
*/
char_u *
vim_strnsave_up(char_u *string, size_t len)
{
char_u *p1;
p1 = vim_strnsave(string, len);
vim_strup(p1);
return p1;
}
/*
* ASCII lower-to-upper case translation, language independent.
*/
void
vim_strup(
char_u *p)
{
char_u *p2;
int c;
if (p != NULL)
{
p2 = p;
while ((c = *p2) != NUL)
#ifdef EBCDIC
*p2++ = isalpha(c) ? toupper(c) : c;
#else
*p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
#endif
}
}
#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
/*
* Make string "s" all upper-case and return it in allocated memory.
* Handles multi-byte characters as well as possible.
* Returns NULL when out of memory.
*/
char_u *
strup_save(char_u *orig)
{
char_u *p;
char_u *res;
res = p = vim_strsave(orig);
if (res != NULL)
while (*p != NUL)
{
int l;
if (enc_utf8)
{
int c, uc;
int newl;
char_u *s;
c = utf_ptr2char(p);
l = utf_ptr2len(p);
if (c == 0)
{
// overlong sequence, use only the first byte
c = *p;
l = 1;
}
uc = utf_toupper(c);
// Reallocate string when byte count changes. This is rare,
// thus it's OK to do another malloc()/free().
newl = utf_char2len(uc);
if (newl != l)
{
s = alloc(STRLEN(res) + 1 + newl - l);
if (s == NULL)
{
vim_free(res);
return NULL;
}
mch_memmove(s, res, p - res);
STRCPY(s + (p - res) + newl, p + l);
p = s + (p - res);
vim_free(res);
res = s;
}
utf_char2bytes(uc, p);
p += newl;
}
else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
p += l; // skip multi-byte character
else
{
*p = TOUPPER_LOC(*p); // note that toupper() can be a macro
p++;
}
}
return res;
}
/*
* Make string "s" all lower-case and return it in allocated memory.
* Handles multi-byte characters as well as possible.
* Returns NULL when out of memory.
*/
char_u *
strlow_save(char_u *orig)
{
char_u *p;
char_u *res;
res = p = vim_strsave(orig);
if (res != NULL)
while (*p != NUL)
{
int l;
if (enc_utf8)
{
int c, lc;
int newl;
char_u *s;
c = utf_ptr2char(p);
l = utf_ptr2len(p);
if (c == 0)
{
// overlong sequence, use only the first byte
c = *p;
l = 1;
}
lc = utf_tolower(c);
// Reallocate string when byte count changes. This is rare,
// thus it's OK to do another malloc()/free().
newl = utf_char2len(lc);
if (newl != l)
{
s = alloc(STRLEN(res) + 1 + newl - l);
if (s == NULL)
{
vim_free(res);
return NULL;
}
mch_memmove(s, res, p - res);
STRCPY(s + (p - res) + newl, p + l);
p = s + (p - res);
vim_free(res);
res = s;
}
utf_char2bytes(lc, p);
p += newl;
}
else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
p += l; // skip multi-byte character
else
{
*p = TOLOWER_LOC(*p); // note that tolower() can be a macro
p++;
}
}
return res;
}
#endif
/*
* delete spaces at the end of a string
*/
void
del_trailing_spaces(char_u *ptr)
{
char_u *q;
q = ptr + STRLEN(ptr);
while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
*q = NUL;
}
/*
* Like strncpy(), but always terminate the result with one NUL.
* "to" must be "len + 1" long!
*/
void
vim_strncpy(char_u *to, char_u *from, size_t len)
{
STRNCPY(to, from, len);
to[len] = NUL;
}
/*
* Like strcat(), but make sure the result fits in "tosize" bytes and is
* always NUL terminated. "from" and "to" may overlap.
*/
void
vim_strcat(char_u *to, char_u *from, size_t tosize)
{
size_t tolen = STRLEN(to);
size_t fromlen = STRLEN(from);
if (tolen + fromlen + 1 > tosize)
{
mch_memmove(to + tolen, from, tosize - tolen - 1);
to[tosize - 1] = NUL;
}
else
mch_memmove(to + tolen, from, fromlen + 1);
}
/*
* Isolate one part of a string option where parts are separated with
* "sep_chars".
@@ -1848,180 +1353,6 @@ vim_memset(void *ptr, int c, size_t size)
}
#endif
#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
/*
* Compare two strings, ignoring case, using current locale.
* Doesn't work for multi-byte characters.
* return 0 for match, < 0 for smaller, > 0 for bigger
*/
int
vim_stricmp(char *s1, char *s2)
{
int i;
for (;;)
{
i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
if (i != 0)
return i; // this character different
if (*s1 == NUL)
break; // strings match until NUL
++s1;
++s2;
}
return 0; // strings match
}
#endif
#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
/*
* Compare two strings, for length "len", ignoring case, using current locale.
* Doesn't work for multi-byte characters.
* return 0 for match, < 0 for smaller, > 0 for bigger
*/
int
vim_strnicmp(char *s1, char *s2, size_t len)
{
int i;
while (len > 0)
{
i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
if (i != 0)
return i; // this character different
if (*s1 == NUL)
break; // strings match until NUL
++s1;
++s2;
--len;
}
return 0; // strings match
}
#endif
/*
* Search for first occurrence of "c" in "string".
* Version of strchr() that handles unsigned char strings with characters from
* 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
* end of the string.
*/
char_u *
vim_strchr(char_u *string, int c)
{
char_u *p;
int b;
p = string;
if (enc_utf8 && c >= 0x80)
{
while (*p != NUL)
{
int l = utfc_ptr2len(p);
// Avoid matching an illegal byte here.
if (utf_ptr2char(p) == c && l > 1)
return p;
p += l;
}
return NULL;
}
if (enc_dbcs != 0 && c > 255)
{
int n2 = c & 0xff;
c = ((unsigned)c >> 8) & 0xff;
while ((b = *p) != NUL)
{
if (b == c && p[1] == n2)
return p;
p += (*mb_ptr2len)(p);
}
return NULL;
}
if (has_mbyte)
{
while ((b = *p) != NUL)
{
if (b == c)
return p;
p += (*mb_ptr2len)(p);
}
return NULL;
}
while ((b = *p) != NUL)
{
if (b == c)
return p;
++p;
}
return NULL;
}
/*
* Version of strchr() that only works for bytes and handles unsigned char
* strings with characters above 128 correctly. It also doesn't return a
* pointer to the NUL at the end of the string.
*/
char_u *
vim_strbyte(char_u *string, int c)
{
char_u *p = string;
while (*p != NUL)
{
if (*p == c)
return p;
++p;
}
return NULL;
}
/*
* Search for last occurrence of "c" in "string".
* Version of strrchr() that handles unsigned char strings with characters from
* 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
* end of the string.
* Return NULL if not found.
* Does not handle multi-byte char for "c"!
*/
char_u *
vim_strrchr(char_u *string, int c)
{
char_u *retval = NULL;
char_u *p = string;
while (*p)
{
if (*p == c)
retval = p;
MB_PTR_ADV(p);
}
return retval;
}
/*
* Vim's version of strpbrk(), in case it's missing.
* Don't generate a prototype for this, causes problems when it's not used.
*/
#ifndef PROTO
# ifndef HAVE_STRPBRK
# ifdef vim_strpbrk
# undef vim_strpbrk
# endif
char_u *
vim_strpbrk(char_u *s, char_u *charset)
{
while (*s)
{
if (vim_strchr(charset, *s) != NULL)
return s;
MB_PTR_ADV(s);
}
return NULL;
}
# endif
#endif
/*
* Vim has its own isspace() function, because on some machines isspace()
* can't handle characters above 128.
@@ -3974,25 +3305,6 @@ qsort(
}
#endif
/*
* Sort an array of strings.
*/
static int sort_compare(const void *s1, const void *s2);
static int
sort_compare(const void *s1, const void *s2)
{
return STRCMP(*(char **)s1, *(char **)s2);
}
void
sort_strings(
char_u **files,
int count)
{
qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
}
/*
* The putenv() implementation below comes from the "screen" program.
* Included with permission from Juergen Weigert.
@@ -4304,24 +3616,6 @@ put_bytes(FILE *fd, long_u nr, int len)
#endif
#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
/*
* Return TRUE if string "s" contains a non-ASCII character (128 or higher).
* When "s" is NULL FALSE is returned.
*/
int
has_non_ascii(char_u *s)
{
char_u *p;
if (s != NULL)
for (p = s; *p != NUL; ++p)
if (*p >= 128)
return TRUE;
return FALSE;
}
#endif
#ifndef PROTO // proto is defined in vim.h
# ifdef ELAPSED_TIMEVAL
/*