patch 9.1.1669: Vim script: no support for URI de-/encoding

Problem: Vim script: no support for URI de-/encoding (ubaldot) Solution: Add the uri_encode() and uri_decode() functions (Yegappan Lakshmanan) fixes: #17861 closes: #18034 Signed-off-by: Yegappan Lakshmanan <yegappan@yahoo.com> Signed-off-by: Christian Brabandt <cb@256bit.org>
2025-10-18 07:54:29 -04:00 · 2025-08-23 06:26:16 -04:00
parent da34f84847
commit 454c7ea484
9 changed files with 271 additions and 0 deletions
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -747,6 +747,8 @@ undofile({name})		String	undo file name for {name}
 undotree([{buf}])		List	undo file tree for buffer {buf}
 uniq({list} [, {func} [, {dict}]])
 				List	remove adjacent duplicates from a list
 uri_decode({string})		String	URI-decode a string
 uri_encode({string})		String	URI-encode a string
 utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
 				Number	UTF-16 index of byte {idx} in {string}
 values({dict})			List	values in {dict}
@@ -12187,6 +12189,59 @@ uniq({list} [, {func} [, {dict}]])			*uniq()* *E882*
 		Return type: list<{type}>
 uri_decode({string})					*uri_decode()*
 		Returns the URI-decoded form of {string}, reversing
 		percent-encoding (converting sequences like "%3D" back to
 		the corresponding character).
 		The decoding follows standard percent-decoding rules:
 		    - "%HH" is replaced with the character for the hex value
 		      HH.
 		    - If the decoded bytes form valid UTF-8, they are combined
 		      into the corresponding character(s).  Otherwise, the
 		      bytes are kept as-is.
 		    - Invalid or incomplete encodings (e.g. "%GZ", "%3", or a
 		      trailing "%") are left unchanged.
 		Returns an empty String if {string} is empty.
 		Example: >
 			:echo uri_decode('c%3A%5Cmy%5Cdir%5Cfoo%20bar')
 			c:\my\dir\foo bar
 			:echo uri_decode('%CE%B1%CE%B2%CE%B3')
 			αβγ
 <
 		Can also be used as a |method|: >
 			mystr->uri_decode()
 <
 		Return type: |String|
 uri_encode({string})					*uri_encode()*
 		Returns the URI-encoded form of {string}.  URI encoding
 		replaces unsafe or reserved characters with percent-encoded
 		sequences.
 		The encoding follows standard percent-encoding rules:
                    - Alphanumeric characters [0-9A-Za-z] remain unchanged.
                    - The characters "-", "_", ".", and "~" also remain
                      unchanged.
                    - All other characters are replaced with "%HH", where HH
                      is the two-digit uppercase hexadecimal value.
                    - Existing percent-encoded sequences are not modified.
 		Returns an empty String if {string} is empty.
 		Example: >
 			:echo uri_encode('c:\my\dir\foo bar')
 			c%3A%5Cmy%5Cdir%5Cfoo%20bar
 			:echo uri_encode('key=value&name=αβγ')
 			key%3Dvalue%26name%3D%CE%B1%CE%B2%CE%B3
 <
 		Can also be used as a |method|: >
 			mystr->uri_encode()
 <
 		Return type: |String|
 							*utf16idx()*
 utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
 		Same as |charidx()| but returns the UTF-16 code unit index of
--- a/runtime/doc/tags
+++ b/runtime/doc/tags
@@ -11101,6 +11101,8 @@ unix	os_unix.txt	/*unix*
 unlisted-buffer	windows.txt	/*unlisted-buffer*
 up-down-motions	motion.txt	/*up-down-motions*
 uppercase	change.txt	/*uppercase*
 uri_decode()	builtin.txt	/*uri_decode()*
 uri_encode()	builtin.txt	/*uri_encode()*
 urxvt-mouse	options.txt	/*urxvt-mouse*
 use-visual-cmds	version4.txt	/*use-visual-cmds*
 useful-mappings	tips.txt	/*useful-mappings*
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -807,6 +807,8 @@ String manipulation:					*string-functions*
 	str2blob()		convert a list of strings into a blob
 	blob2str()		convert a blob into a list of strings
 	items()			get List of String index-character pairs
 	uri_encode()		URI-encode a string
 	uri_decode()		URI-decode a string
 List manipulation:					*list-functions*
 	get()			get an item without error for wrong index
--- a/runtime/doc/version9.txt
+++ b/runtime/doc/version9.txt
@@ -41788,6 +41788,8 @@ Functions: ~
 |str2blob()|		convert a List of strings into a blob
 |test_null_tuple()|	return a null tuple
 |tuple2list()|		turn a Tuple of items into a List
 |uri_decode()|		URI-decode a string
 |uri_encode()|		URI-encode a string
 |wildtrigger()|		trigger wildcard expansion
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -3116,6 +3116,10 @@ static funcentry_T global_functions[] =
 			ret_dict_any,	    f_undotree},
    {"uniq",		1, 3, FEARG_1,	    arg13_sortuniq,
 			ret_first_arg,	    f_uniq},
    {"uri_decode",	1, 1, FEARG_1,	    arg1_string,
 			ret_string,	    f_uridecode},
    {"uri_encode",	1, 1, FEARG_1,	    arg1_string,
 			ret_string,	    f_uriencode},
    {"utf16idx",	2, 4, FEARG_1,	    arg4_string_number_bool_bool,
 			ret_number,	    f_utf16idx},
    {"values",		1, 1, FEARG_1,	    arg1_dict_any,
--- a/src/proto/strings.pro
+++ b/src/proto/strings.pro
@@ -52,4 +52,6 @@ void f_tolower(typval_T *argvars, typval_T *rettv);
 void f_toupper(typval_T *argvars, typval_T *rettv);
 void f_tr(typval_T *argvars, typval_T *rettv);
 void f_trim(typval_T *argvars, typval_T *rettv);
 void f_uridecode(typval_T *argvars, typval_T *rettv);
 void f_uriencode(typval_T *argvars, typval_T *rettv);
 /* vim: set ft=c : */
--- a/src/strings.c
+++ b/src/strings.c
@@ -2310,6 +2310,151 @@ f_trim(typval_T *argvars, typval_T *rettv)
    rettv->vval.v_string = vim_strnsave(head, tail - head);
 }
 /*
 * Decodes a URI-encoded string.
 *
 * Parameters:
 *   str - The URI-encoded input string (may contain %XX sequences and '+').
 *
 * Returns:
 *   A newly allocated string with URI encoding decoded:
 *     - %XX sequences are converted to the corresponding character.
 *     - If the input is malformed (e.g., incomplete % sequence), the original
 *       characters are copied.
 *   The output string will never be longer than the input string.
 *   The caller is responsible for freeing the returned string.
 *
 * Returns NULL if input is NULL or memory allocation fails.
 */
    static char_u *
 uri_decode(char_u *str)
 {
    if (str == NULL)
 	return NULL;
    size_t len = STRLEN(str);
    char_u *decoded = alloc(len + 1);
    if (!decoded)
 	return NULL;
    char_u	*p = decoded;
    size_t	i = 0;
    while (i < len)
    {
 	if (str[i] == '%')
 	{
 	    if (i + 2 >= len)
 	    {
 		// Malformed encoding
 		*p++ = str[i++];
 		if (str[i] != NUL)
 		    *p++ = str[i++];
 	    }
 	    else
 	    {
 		int val = hexhex2nr(&str[i + 1]);
 		if (val != -1)
 		{
 		    *p++ = (char_u)val;
 		    i += 3;
 		}
 		else
 		{
 		    // invalid hex digits following "%"
 		    for (int j = 0; j < 3; j++)
 			*p++ = str[i++];
 		}
 	    }
 	}
 	else
 	    *p++ = str[i++];
    }
    *p = NUL;
    return decoded;
 }
 /*
 * "uri_decode({str})" function
 */
    void
 f_uridecode(typval_T *argvars, typval_T *rettv)
 {
    rettv->v_type = VAR_STRING;
    rettv->vval.v_string = NULL;
    if (check_for_string_arg(argvars, 0) == FAIL)
 	return;
    rettv->vval.v_string = uri_decode(tv_get_string(&argvars[0]));
 }
 /*
 * Encodes a string for safe use in a URI.
 *
 * Parameters:
 *   str - The input string to encode.
 *
 * Returns:
 *   A newly allocated string where:
 *     - Alphanumeric characters and '-', '_', '.', '~' are left unchanged.
 *     - All other bytes are encoded as %XX (uppercase hex).
 *   The caller is responsible for freeing the returned string.
 *
 *   Returns NULL if input is NULL or memory allocation fails.
 */
    static char_u *
 uri_encode(char_u *str)
 {
    if (str == NULL)
 	return NULL;
    size_t len = STRLEN(str);
    // Worst case: every character needs encoding => 3x size + 1 for null
    // terminator
    char_u *encoded = alloc(len * 3 + 1);
    if (encoded == NULL)
 	return NULL;
    char_u *p = encoded;
    for (size_t i = 0; i < len; ++i)
    {
 	char_u c = str[i];
 	if (ASCII_ISALNUM(c) || c == '-' || c == '_' || c == '.' || c == '~')
 	    *p++ = c;
 	else
 	{
 	    sprintf((char *)p, "%%%02X", c);
 	    p += 3;
 	}
    }
    *p = NUL;
    return encoded;
 }
 /*
 * "uri_encode({str})" function
 */
    void
 f_uriencode(typval_T *argvars, typval_T *rettv)
 {
    rettv->v_type = VAR_STRING;
    rettv->vval.v_string = NULL;
    if (check_for_string_arg(argvars, 0) == FAIL)
 	return;
    rettv->vval.v_string = uri_encode(tv_get_string(&argvars[0]));
 }
 static char *e_printf = N_(e_insufficient_arguments_for_printf);
 /*
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -4503,4 +4503,61 @@ func Test_blob2str()
  call v9.CheckLegacyAndVim9Success(lines)
 endfunc
 " Test for uri_encode() and uri_decode() functions
 func Test_uriencoding()
  let lines =<< trim END
    #" uri encoding
    call assert_equal('a1%20b2', uri_encode('a1 b2'))
    call assert_equal('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-', uri_encode('-?&/#+=:[]@-'))
    call assert_equal('%22%3C%3E%5E%60%7B%7C%7D', uri_encode('"<>^`{|}'))
    call assert_equal('%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5', 'αβγδε'->uri_encode())
    call assert_equal('r%C3%A9sum%C3%A9', uri_encode('résumé'))
    call assert_equal('%E4%BD%A0%E5%A5%BD', uri_encode('你好'))
    call assert_equal('%F0%9F%98%8A%F0%9F%98%8A', uri_encode('😊😊'))
    call assert_equal('-_.~', uri_encode('-_.~'))
    call assert_equal('', uri_encode(''))
    call assert_equal('%2520%2523', uri_encode('%20%23'))
    call assert_equal('', uri_encode(test_null_string()))
    call assert_equal('a', uri_encode('a'))
    call assert_equal('%20', uri_encode(' '))
    call assert_equal('%CE%B1', uri_encode('α'))
    call assert_equal('c%3A%5Cmy%5Cdir%5Ca%20b%20c', uri_encode('c:\my\dir\a b c'))
    call assert_fails('call uri_encode([])', 'E1174: String required for argument 1')
    #" uri decoding
    call assert_equal('a1 b2', uri_decode('a1%20b2'))
    call assert_equal('-?&/#+=:[]@-', uri_decode('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-'))
    call assert_equal('"<>^`{|}', uri_decode('%22%3C%3E%5E%60%7B%7C%7D'))
    call assert_equal('αβγδε', '%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5'->uri_decode())
    call assert_equal('résumé', uri_decode('r%C3%A9sum%C3%A9'))
    call assert_equal('你好', uri_decode('%E4%BD%A0%E5%A5%BD'))
    call assert_equal('😊😊', uri_decode('%F0%9F%98%8A%F0%9F%98%8A'))
    call assert_equal('a+b', uri_decode('a+b'))
    call assert_equal('-_.~', uri_decode('-_.~'))
    call assert_equal('', uri_decode(''))
    call assert_equal('%20%23', uri_decode('%2520%2523'))
    call assert_equal('', uri_decode(test_null_string()))
    call assert_equal('a', uri_decode('a'))
    call assert_equal(' ', uri_decode('%20'))
    call assert_equal('α', uri_decode('%CE%B1'))
    call assert_equal('c:\my\dir\a b c', uri_decode('c%3A%5Cmy%5Cdir%5Ca%20b%20c'))
    call assert_equal('%', uri_decode('%'))
    call assert_equal('%3', uri_decode('%3'))
    call assert_equal(';', uri_decode('%3b'))
    call assert_equal('a%xyb', uri_decode('a%xyb'))
    call assert_fails('call uri_decode([])', 'E1174: String required for argument 1')
    #" control characters
    VAR cstr = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10"
    LET cstr ..= "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
    VAR expected = ''
    for i in range(1, 31)
      LET expected ..= printf("%%%02X", i)
    endfor
    call assert_equal(expected, uri_encode(cstr))
    call assert_equal(cstr, uri_decode(expected))
  END
  call v9.CheckLegacyAndVim9Success(lines)
 endfunc
 " vim: shiftwidth=2 sts=2 expandtab
--- a/src/version.c
+++ b/src/version.c
@@ -724,6 +724,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
    1669,
 /**/
    1668,
 /**/