patch 8.2.3139: functions for string manipulation are spread out

Problem: Functions for string manipulation are spread out. Solution: Move string related functions to a new source file. (Yegappan Lakshmanan, closes #8470)
2021-07-10 21:29:18 +02:00
parent 31e21766d6
commit a2438132a6
21 changed files with 1673 additions and 1627 deletions
--- a/src/misc2.c
+++ b/src/misc2.c
@@ -1267,42 +1267,6 @@ free_all_mem(void)
 }
 #endif

-/*
- * Copy "string" into newly allocated memory.
- */
-    char_u *
-vim_strsave(char_u *string)
-{
-    char_u	*p;
-    size_t	len;
-
-    len = STRLEN(string) + 1;
-    p = alloc(len);
-    if (p != NULL)
-	mch_memmove(p, string, len);
-    return p;
-}
-
-/*
- * Copy up to "len" bytes of "string" into newly allocated memory and
- * terminate with a NUL.
- * The allocated memory always has size "len + 1", also when "string" is
- * shorter.
- */
-    char_u *
-vim_strnsave(char_u *string, size_t len)
-{
-    char_u	*p;
-
-    p = alloc(len + 1);
-    if (p != NULL)
-    {
-	STRNCPY(p, string, len);
-	p[len] = NUL;
-    }
-    return p;
-}
-
 /*
 * Copy "p[len]" into allocated memory, ignoring NUL characters.
 * Returns NULL when out of memory.
@@ -1317,465 +1281,6 @@ vim_memsave(char_u *p, size_t len)
    return ret;
 }

-/*
- * Same as vim_strsave(), but any characters found in esc_chars are preceded
- * by a backslash.
- */
-    char_u *
-vim_strsave_escaped(char_u *string, char_u *esc_chars)
-{
-    return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
-}
-
-/*
- * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
- * characters where rem_backslash() would remove the backslash.
- * Escape the characters with "cc".
- */
-    char_u *
-vim_strsave_escaped_ext(
-    char_u	*string,
-    char_u	*esc_chars,
-    int		cc,
-    int		bsl)
-{
-    char_u	*p;
-    char_u	*p2;
-    char_u	*escaped_string;
-    unsigned	length;
-    int		l;
-
-    /*
-     * First count the number of backslashes required.
-     * Then allocate the memory and insert them.
-     */
-    length = 1;				// count the trailing NUL
-    for (p = string; *p; p++)
-    {
-	if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-	{
-	    length += l;		// count a multibyte char
-	    p += l - 1;
-	    continue;
-	}
-	if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
-	    ++length;			// count a backslash
-	++length;			// count an ordinary char
-    }
-    escaped_string = alloc(length);
-    if (escaped_string != NULL)
-    {
-	p2 = escaped_string;
-	for (p = string; *p; p++)
-	{
-	    if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-	    {
-		mch_memmove(p2, p, (size_t)l);
-		p2 += l;
-		p += l - 1;		// skip multibyte char
-		continue;
-	    }
-	    if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
-		*p2++ = cc;
-	    *p2++ = *p;
-	}
-	*p2 = NUL;
-    }
-    return escaped_string;
-}
-
-/*
- * Return TRUE when 'shell' has "csh" in the tail.
- */
-    int
-csh_like_shell(void)
-{
-    return (strstr((char *)gettail(p_sh), "csh") != NULL);
-}
-
-/*
- * Escape "string" for use as a shell argument with system().
- * This uses single quotes, except when we know we need to use double quotes
- * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
- * PowerShell also uses a novel escaping for enclosed single quotes - double
- * them up.
- * Escape a newline, depending on the 'shell' option.
- * When "do_special" is TRUE also replace "!", "%", "#" and things starting
- * with "<" like "<cfile>".
- * When "do_newline" is FALSE do not escape newline unless it is csh shell.
- * Returns the result in allocated memory, NULL if we have run out.
- */
-    char_u *
-vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
-{
-    unsigned	length;
-    char_u	*p;
-    char_u	*d;
-    char_u	*escaped_string;
-    int		l;
-    int		csh_like;
-    char_u	*shname;
-    int		powershell;
-# ifdef MSWIN
-    int		double_quotes;
-# endif
-
-    // Only csh and similar shells expand '!' within single quotes.  For sh and
-    // the like we must not put a backslash before it, it will be taken
-    // literally.  If do_special is set the '!' will be escaped twice.
-    // Csh also needs to have "\n" escaped twice when do_special is set.
-    csh_like = csh_like_shell();
-
-    // PowerShell uses it's own version for quoting single quotes
-    shname = gettail(p_sh);
-    powershell = strstr((char *)shname, "pwsh") != NULL;
-# ifdef MSWIN
-    powershell = powershell || strstr((char *)shname, "powershell") != NULL;
-    // PowerShell only accepts single quotes so override shellslash.
-    double_quotes = !powershell && !p_ssl;
-# endif
-
-    // First count the number of extra bytes required.
-    length = (unsigned)STRLEN(string) + 3;  // two quotes and a trailing NUL
-    for (p = string; *p != NUL; MB_PTR_ADV(p))
-    {
-# ifdef MSWIN
-	if (double_quotes)
-	{
-	    if (*p == '"')
-		++length;		// " -> ""
-	}
-	else
-# endif
-	if (*p == '\'')
-	{
-	    if (powershell)
-		length +=2;		// ' => ''
-	    else
-		length += 3;		// ' => '\''
-	}
-	if ((*p == '\n' && (csh_like || do_newline))
-		|| (*p == '!' && (csh_like || do_special)))
-	{
-	    ++length;			// insert backslash
-	    if (csh_like && do_special)
-		++length;		// insert backslash
-	}
-	if (do_special && find_cmdline_var(p, &l) >= 0)
-	{
-	    ++length;			// insert backslash
-	    p += l - 1;
-	}
-    }
-
-    // Allocate memory for the result and fill it.
-    escaped_string = alloc(length);
-    if (escaped_string != NULL)
-    {
-	d = escaped_string;
-
-	// add opening quote
-# ifdef MSWIN
-	if (double_quotes)
-	    *d++ = '"';
-	else
-# endif
-	    *d++ = '\'';
-
-	for (p = string; *p != NUL; )
-	{
-# ifdef MSWIN
-	    if (double_quotes)
-	    {
-		if (*p == '"')
-		{
-		    *d++ = '"';
-		    *d++ = '"';
-		    ++p;
-		    continue;
-		}
-	    }
-	    else
-# endif
-	    if (*p == '\'')
-	    {
-		if (powershell)
-		{
-		    *d++ = '\'';
-		    *d++ = '\'';
-		}
-		else
-		{
-		    *d++ = '\'';
-		    *d++ = '\\';
-		    *d++ = '\'';
-		    *d++ = '\'';
-		}
-		++p;
-		continue;
-	    }
-	    if ((*p == '\n' && (csh_like || do_newline))
-		    || (*p == '!' && (csh_like || do_special)))
-	    {
-		*d++ = '\\';
-		if (csh_like && do_special)
-		    *d++ = '\\';
-		*d++ = *p++;
-		continue;
-	    }
-	    if (do_special && find_cmdline_var(p, &l) >= 0)
-	    {
-		*d++ = '\\';		// insert backslash
-		while (--l >= 0)	// copy the var
-		    *d++ = *p++;
-		continue;
-	    }
-
-	    MB_COPY_CHAR(p, d);
-	}
-
-	// add terminating quote and finish with a NUL
-# ifdef MSWIN
-	if (double_quotes)
-	    *d++ = '"';
-	else
-# endif
-	    *d++ = '\'';
-	*d = NUL;
-    }
-
-    return escaped_string;
-}
-
-/*
- * Like vim_strsave(), but make all characters uppercase.
- * This uses ASCII lower-to-upper case translation, language independent.
- */
-    char_u *
-vim_strsave_up(char_u *string)
-{
-    char_u *p1;
-
-    p1 = vim_strsave(string);
-    vim_strup(p1);
-    return p1;
-}
-
-/*
- * Like vim_strnsave(), but make all characters uppercase.
- * This uses ASCII lower-to-upper case translation, language independent.
- */
-    char_u *
-vim_strnsave_up(char_u *string, size_t len)
-{
-    char_u *p1;
-
-    p1 = vim_strnsave(string, len);
-    vim_strup(p1);
-    return p1;
-}
-
-/*
- * ASCII lower-to-upper case translation, language independent.
- */
-    void
-vim_strup(
-    char_u	*p)
-{
-    char_u  *p2;
-    int	    c;
-
-    if (p != NULL)
-    {
-	p2 = p;
-	while ((c = *p2) != NUL)
-#ifdef EBCDIC
-	    *p2++ = isalpha(c) ? toupper(c) : c;
-#else
-	    *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
-#endif
-    }
-}
-
-#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
-/*
- * Make string "s" all upper-case and return it in allocated memory.
- * Handles multi-byte characters as well as possible.
- * Returns NULL when out of memory.
- */
-    char_u *
-strup_save(char_u *orig)
-{
-    char_u	*p;
-    char_u	*res;
-
-    res = p = vim_strsave(orig);
-
-    if (res != NULL)
-	while (*p != NUL)
-	{
-	    int		l;
-
-	    if (enc_utf8)
-	    {
-		int	c, uc;
-		int	newl;
-		char_u	*s;
-
-		c = utf_ptr2char(p);
-		l = utf_ptr2len(p);
-		if (c == 0)
-		{
-		    // overlong sequence, use only the first byte
-		    c = *p;
-		    l = 1;
-		}
-		uc = utf_toupper(c);
-
-		// Reallocate string when byte count changes.  This is rare,
-		// thus it's OK to do another malloc()/free().
-		newl = utf_char2len(uc);
-		if (newl != l)
-		{
-		    s = alloc(STRLEN(res) + 1 + newl - l);
-		    if (s == NULL)
-		    {
-			vim_free(res);
-			return NULL;
-		    }
-		    mch_memmove(s, res, p - res);
-		    STRCPY(s + (p - res) + newl, p + l);
-		    p = s + (p - res);
-		    vim_free(res);
-		    res = s;
-		}
-
-		utf_char2bytes(uc, p);
-		p += newl;
-	    }
-	    else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-		p += l;		// skip multi-byte character
-	    else
-	    {
-		*p = TOUPPER_LOC(*p); // note that toupper() can be a macro
-		p++;
-	    }
-	}
-
-    return res;
-}
-
-/*
- * Make string "s" all lower-case and return it in allocated memory.
- * Handles multi-byte characters as well as possible.
- * Returns NULL when out of memory.
- */
-    char_u *
-strlow_save(char_u *orig)
-{
-    char_u	*p;
-    char_u	*res;
-
-    res = p = vim_strsave(orig);
-
-    if (res != NULL)
-	while (*p != NUL)
-	{
-	    int		l;
-
-	    if (enc_utf8)
-	    {
-		int	c, lc;
-		int	newl;
-		char_u	*s;
-
-		c = utf_ptr2char(p);
-		l = utf_ptr2len(p);
-		if (c == 0)
-		{
-		    // overlong sequence, use only the first byte
-		    c = *p;
-		    l = 1;
-		}
-		lc = utf_tolower(c);
-
-		// Reallocate string when byte count changes.  This is rare,
-		// thus it's OK to do another malloc()/free().
-		newl = utf_char2len(lc);
-		if (newl != l)
-		{
-		    s = alloc(STRLEN(res) + 1 + newl - l);
-		    if (s == NULL)
-		    {
-			vim_free(res);
-			return NULL;
-		    }
-		    mch_memmove(s, res, p - res);
-		    STRCPY(s + (p - res) + newl, p + l);
-		    p = s + (p - res);
-		    vim_free(res);
-		    res = s;
-		}
-
-		utf_char2bytes(lc, p);
-		p += newl;
-	    }
-	    else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-		p += l;		// skip multi-byte character
-	    else
-	    {
-		*p = TOLOWER_LOC(*p); // note that tolower() can be a macro
-		p++;
-	    }
-	}
-
-    return res;
-}
-#endif
-
-/*
- * delete spaces at the end of a string
- */
-    void
-del_trailing_spaces(char_u *ptr)
-{
-    char_u	*q;
-
-    q = ptr + STRLEN(ptr);
-    while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
-	*q = NUL;
-}
-
-/*
- * Like strncpy(), but always terminate the result with one NUL.
- * "to" must be "len + 1" long!
- */
-    void
-vim_strncpy(char_u *to, char_u *from, size_t len)
-{
-    STRNCPY(to, from, len);
-    to[len] = NUL;
-}
-
-/*
- * Like strcat(), but make sure the result fits in "tosize" bytes and is
- * always NUL terminated. "from" and "to" may overlap.
- */
-    void
-vim_strcat(char_u *to, char_u *from, size_t tosize)
-{
-    size_t tolen = STRLEN(to);
-    size_t fromlen = STRLEN(from);
-
-    if (tolen + fromlen + 1 > tosize)
-    {
-	mch_memmove(to + tolen, from, tosize - tolen - 1);
-	to[tosize - 1] = NUL;
-    }
-    else
-	mch_memmove(to + tolen, from, fromlen + 1);
-}
-
 /*
 * Isolate one part of a string option where parts are separated with
 * "sep_chars".
@@ -1848,180 +1353,6 @@ vim_memset(void *ptr, int c, size_t size)
 }
 #endif

-#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
-/*
- * Compare two strings, ignoring case, using current locale.
- * Doesn't work for multi-byte characters.
- * return 0 for match, < 0 for smaller, > 0 for bigger
- */
-    int
-vim_stricmp(char *s1, char *s2)
-{
-    int		i;
-
-    for (;;)
-    {
-	i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
-	if (i != 0)
-	    return i;			    // this character different
-	if (*s1 == NUL)
-	    break;			    // strings match until NUL
-	++s1;
-	++s2;
-    }
-    return 0;				    // strings match
-}
-#endif
-
-#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
-/*
- * Compare two strings, for length "len", ignoring case, using current locale.
- * Doesn't work for multi-byte characters.
- * return 0 for match, < 0 for smaller, > 0 for bigger
- */
-    int
-vim_strnicmp(char *s1, char *s2, size_t len)
-{
-    int		i;
-
-    while (len > 0)
-    {
-	i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
-	if (i != 0)
-	    return i;			    // this character different
-	if (*s1 == NUL)
-	    break;			    // strings match until NUL
-	++s1;
-	++s2;
-	--len;
-    }
-    return 0;				    // strings match
-}
-#endif
-
-/*
- * Search for first occurrence of "c" in "string".
- * Version of strchr() that handles unsigned char strings with characters from
- * 128 to 255 correctly.  It also doesn't return a pointer to the NUL at the
- * end of the string.
- */
-    char_u  *
-vim_strchr(char_u *string, int c)
-{
-    char_u	*p;
-    int		b;
-
-    p = string;
-    if (enc_utf8 && c >= 0x80)
-    {
-	while (*p != NUL)
-	{
-	    int l = utfc_ptr2len(p);
-
-	    // Avoid matching an illegal byte here.
-	    if (utf_ptr2char(p) == c && l > 1)
-		return p;
-	    p += l;
-	}
-	return NULL;
-    }
-    if (enc_dbcs != 0 && c > 255)
-    {
-	int	n2 = c & 0xff;
-
-	c = ((unsigned)c >> 8) & 0xff;
-	while ((b = *p) != NUL)
-	{
-	    if (b == c && p[1] == n2)
-		return p;
-	    p += (*mb_ptr2len)(p);
-	}
-	return NULL;
-    }
-    if (has_mbyte)
-    {
-	while ((b = *p) != NUL)
-	{
-	    if (b == c)
-		return p;
-	    p += (*mb_ptr2len)(p);
-	}
-	return NULL;
-    }
-    while ((b = *p) != NUL)
-    {
-	if (b == c)
-	    return p;
-	++p;
-    }
-    return NULL;
-}
-
-/*
- * Version of strchr() that only works for bytes and handles unsigned char
- * strings with characters above 128 correctly. It also doesn't return a
- * pointer to the NUL at the end of the string.
- */
-    char_u  *
-vim_strbyte(char_u *string, int c)
-{
-    char_u	*p = string;
-
-    while (*p != NUL)
-    {
-	if (*p == c)
-	    return p;
-	++p;
-    }
-    return NULL;
-}
-
-/*
- * Search for last occurrence of "c" in "string".
- * Version of strrchr() that handles unsigned char strings with characters from
- * 128 to 255 correctly.  It also doesn't return a pointer to the NUL at the
- * end of the string.
- * Return NULL if not found.
- * Does not handle multi-byte char for "c"!
- */
-    char_u  *
-vim_strrchr(char_u *string, int c)
-{
-    char_u	*retval = NULL;
-    char_u	*p = string;
-
-    while (*p)
-    {
-	if (*p == c)
-	    retval = p;
-	MB_PTR_ADV(p);
-    }
-    return retval;
-}
-
-/*
- * Vim's version of strpbrk(), in case it's missing.
- * Don't generate a prototype for this, causes problems when it's not used.
- */
-#ifndef PROTO
-# ifndef HAVE_STRPBRK
-#  ifdef vim_strpbrk
-#   undef vim_strpbrk
-#  endif
-    char_u *
-vim_strpbrk(char_u *s, char_u *charset)
-{
-    while (*s)
-    {
-	if (vim_strchr(charset, *s) != NULL)
-	    return s;
-	MB_PTR_ADV(s);
-    }
-    return NULL;
-}
-# endif
-#endif
-
 /*
 * Vim has its own isspace() function, because on some machines isspace()
 * can't handle characters above 128.
@@ -3974,25 +3305,6 @@ qsort(
 }
 #endif

-/*
- * Sort an array of strings.
- */
-static int sort_compare(const void *s1, const void *s2);
-
-    static int
-sort_compare(const void *s1, const void *s2)
-{
-    return STRCMP(*(char **)s1, *(char **)s2);
-}
-
-    void
-sort_strings(
-    char_u	**files,
-    int		count)
-{
-    qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
-}
-
 /*
 * The putenv() implementation below comes from the "screen" program.
 * Included with permission from Juergen Weigert.
@@ -4304,24 +3616,6 @@ put_bytes(FILE *fd, long_u nr, int len)

 #endif

-#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
-/*
- * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
- * When "s" is NULL FALSE is returned.
- */
-    int
-has_non_ascii(char_u *s)
-{
-    char_u	*p;
-
-    if (s != NULL)
-	for (p = s; *p != NUL; ++p)
-	    if (*p >= 128)
-		return TRUE;
-    return FALSE;
-}
-#endif
-
 #ifndef PROTO  // proto is defined in vim.h
 # ifdef ELAPSED_TIMEVAL
 /*