diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 9ab97ecab4..32cb40f1b7 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -747,6 +747,8 @@ undofile({name}) String undo file name for {name} undotree([{buf}]) List undo file tree for buffer {buf} uniq({list} [, {func} [, {dict}]]) List remove adjacent duplicates from a list +uri_decode({string}) String URI-decode a string +uri_encode({string}) String URI-encode a string utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) Number UTF-16 index of byte {idx} in {string} values({dict}) List values in {dict} @@ -12187,6 +12189,59 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882* Return type: list<{type}> +uri_decode({string}) *uri_decode()* + Returns the URI-decoded form of {string}, reversing + percent-encoding (converting sequences like "%3D" back to + the corresponding character). + + The decoding follows standard percent-decoding rules: + - "%HH" is replaced with the character for the hex value + HH. + - If the decoded bytes form valid UTF-8, they are combined + into the corresponding character(s). Otherwise, the + bytes are kept as-is. + - Invalid or incomplete encodings (e.g. "%GZ", "%3", or a + trailing "%") are left unchanged. + + Returns an empty String if {string} is empty. + + Example: > + :echo uri_decode('c%3A%5Cmy%5Cdir%5Cfoo%20bar') + c:\my\dir\foo bar + :echo uri_decode('%CE%B1%CE%B2%CE%B3') + αβγ +< + Can also be used as a |method|: > + mystr->uri_decode() +< + Return type: |String| + +uri_encode({string}) *uri_encode()* + Returns the URI-encoded form of {string}. URI encoding + replaces unsafe or reserved characters with percent-encoded + sequences. + + The encoding follows standard percent-encoding rules: + - Alphanumeric characters [0-9A-Za-z] remain unchanged. + - The characters "-", "_", ".", and "~" also remain + unchanged. + - All other characters are replaced with "%HH", where HH + is the two-digit uppercase hexadecimal value. + - Existing percent-encoded sequences are not modified. + + Returns an empty String if {string} is empty. + + Example: > + :echo uri_encode('c:\my\dir\foo bar') + c%3A%5Cmy%5Cdir%5Cfoo%20bar + :echo uri_encode('key=value&name=αβγ') + key%3Dvalue%26name%3D%CE%B1%CE%B2%CE%B3 +< + Can also be used as a |method|: > + mystr->uri_encode() +< + Return type: |String| + *utf16idx()* utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) Same as |charidx()| but returns the UTF-16 code unit index of diff --git a/runtime/doc/tags b/runtime/doc/tags index d42cac486a..2046d6f933 100644 --- a/runtime/doc/tags +++ b/runtime/doc/tags @@ -11101,6 +11101,8 @@ unix os_unix.txt /*unix* unlisted-buffer windows.txt /*unlisted-buffer* up-down-motions motion.txt /*up-down-motions* uppercase change.txt /*uppercase* +uri_decode() builtin.txt /*uri_decode()* +uri_encode() builtin.txt /*uri_encode()* urxvt-mouse options.txt /*urxvt-mouse* use-visual-cmds version4.txt /*use-visual-cmds* useful-mappings tips.txt /*useful-mappings* diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index ce05623b52..813211e7c8 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -807,6 +807,8 @@ String manipulation: *string-functions* str2blob() convert a list of strings into a blob blob2str() convert a blob into a list of strings items() get List of String index-character pairs + uri_encode() URI-encode a string + uri_decode() URI-decode a string List manipulation: *list-functions* get() get an item without error for wrong index diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt index 6ea5257590..ce8832b259 100644 --- a/runtime/doc/version9.txt +++ b/runtime/doc/version9.txt @@ -41788,6 +41788,8 @@ Functions: ~ |str2blob()| convert a List of strings into a blob |test_null_tuple()| return a null tuple |tuple2list()| turn a Tuple of items into a List +|uri_decode()| URI-decode a string +|uri_encode()| URI-encode a string |wildtrigger()| trigger wildcard expansion diff --git a/src/evalfunc.c b/src/evalfunc.c index 2dced8e3fc..91c7ca04e0 100644 --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -3116,6 +3116,10 @@ static funcentry_T global_functions[] = ret_dict_any, f_undotree}, {"uniq", 1, 3, FEARG_1, arg13_sortuniq, ret_first_arg, f_uniq}, + {"uri_decode", 1, 1, FEARG_1, arg1_string, + ret_string, f_uridecode}, + {"uri_encode", 1, 1, FEARG_1, arg1_string, + ret_string, f_uriencode}, {"utf16idx", 2, 4, FEARG_1, arg4_string_number_bool_bool, ret_number, f_utf16idx}, {"values", 1, 1, FEARG_1, arg1_dict_any, diff --git a/src/proto/strings.pro b/src/proto/strings.pro index 8e7dc180d3..aeb2e641e3 100644 --- a/src/proto/strings.pro +++ b/src/proto/strings.pro @@ -52,4 +52,6 @@ void f_tolower(typval_T *argvars, typval_T *rettv); void f_toupper(typval_T *argvars, typval_T *rettv); void f_tr(typval_T *argvars, typval_T *rettv); void f_trim(typval_T *argvars, typval_T *rettv); +void f_uridecode(typval_T *argvars, typval_T *rettv); +void f_uriencode(typval_T *argvars, typval_T *rettv); /* vim: set ft=c : */ diff --git a/src/strings.c b/src/strings.c index 87868b90c4..fbaf5dc27a 100644 --- a/src/strings.c +++ b/src/strings.c @@ -2310,6 +2310,151 @@ f_trim(typval_T *argvars, typval_T *rettv) rettv->vval.v_string = vim_strnsave(head, tail - head); } +/* + * Decodes a URI-encoded string. + * + * Parameters: + * str - The URI-encoded input string (may contain %XX sequences and '+'). + * + * Returns: + * A newly allocated string with URI encoding decoded: + * - %XX sequences are converted to the corresponding character. + * - If the input is malformed (e.g., incomplete % sequence), the original + * characters are copied. + * The output string will never be longer than the input string. + * The caller is responsible for freeing the returned string. + * + * Returns NULL if input is NULL or memory allocation fails. + */ + static char_u * +uri_decode(char_u *str) +{ + if (str == NULL) + return NULL; + + size_t len = STRLEN(str); + + char_u *decoded = alloc(len + 1); + if (!decoded) + return NULL; + + char_u *p = decoded; + size_t i = 0; + + while (i < len) + { + if (str[i] == '%') + { + if (i + 2 >= len) + { + // Malformed encoding + *p++ = str[i++]; + if (str[i] != NUL) + *p++ = str[i++]; + } + else + { + int val = hexhex2nr(&str[i + 1]); + if (val != -1) + { + *p++ = (char_u)val; + i += 3; + } + else + { + // invalid hex digits following "%" + for (int j = 0; j < 3; j++) + *p++ = str[i++]; + } + } + + } + else + *p++ = str[i++]; + } + + *p = NUL; + + return decoded; +} + +/* + * "uri_decode({str})" function + */ + void +f_uridecode(typval_T *argvars, typval_T *rettv) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + + if (check_for_string_arg(argvars, 0) == FAIL) + return; + + rettv->vval.v_string = uri_decode(tv_get_string(&argvars[0])); +} + +/* + * Encodes a string for safe use in a URI. + * + * Parameters: + * str - The input string to encode. + * + * Returns: + * A newly allocated string where: + * - Alphanumeric characters and '-', '_', '.', '~' are left unchanged. + * - All other bytes are encoded as %XX (uppercase hex). + * The caller is responsible for freeing the returned string. + * + * Returns NULL if input is NULL or memory allocation fails. + */ + static char_u * +uri_encode(char_u *str) +{ + if (str == NULL) + return NULL; + + size_t len = STRLEN(str); + + // Worst case: every character needs encoding => 3x size + 1 for null + // terminator + char_u *encoded = alloc(len * 3 + 1); + if (encoded == NULL) + return NULL; + + char_u *p = encoded; + + for (size_t i = 0; i < len; ++i) + { + char_u c = str[i]; + if (ASCII_ISALNUM(c) || c == '-' || c == '_' || c == '.' || c == '~') + *p++ = c; + else + { + sprintf((char *)p, "%%%02X", c); + p += 3; + } + } + + *p = NUL; + + return encoded; +} + +/* + * "uri_encode({str})" function + */ + void +f_uriencode(typval_T *argvars, typval_T *rettv) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + + if (check_for_string_arg(argvars, 0) == FAIL) + return; + + rettv->vval.v_string = uri_encode(tv_get_string(&argvars[0])); +} + static char *e_printf = N_(e_insufficient_arguments_for_printf); /* diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim index ca54d37291..9dd9601810 100644 --- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -4503,4 +4503,61 @@ func Test_blob2str() call v9.CheckLegacyAndVim9Success(lines) endfunc +" Test for uri_encode() and uri_decode() functions +func Test_uriencoding() + let lines =<< trim END + #" uri encoding + call assert_equal('a1%20b2', uri_encode('a1 b2')) + call assert_equal('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-', uri_encode('-?&/#+=:[]@-')) + call assert_equal('%22%3C%3E%5E%60%7B%7C%7D', uri_encode('"<>^`{|}')) + call assert_equal('%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5', 'αβγδε'->uri_encode()) + call assert_equal('r%C3%A9sum%C3%A9', uri_encode('résumé')) + call assert_equal('%E4%BD%A0%E5%A5%BD', uri_encode('你好')) + call assert_equal('%F0%9F%98%8A%F0%9F%98%8A', uri_encode('😊😊')) + call assert_equal('-_.~', uri_encode('-_.~')) + call assert_equal('', uri_encode('')) + call assert_equal('%2520%2523', uri_encode('%20%23')) + call assert_equal('', uri_encode(test_null_string())) + call assert_equal('a', uri_encode('a')) + call assert_equal('%20', uri_encode(' ')) + call assert_equal('%CE%B1', uri_encode('α')) + call assert_equal('c%3A%5Cmy%5Cdir%5Ca%20b%20c', uri_encode('c:\my\dir\a b c')) + call assert_fails('call uri_encode([])', 'E1174: String required for argument 1') + + #" uri decoding + call assert_equal('a1 b2', uri_decode('a1%20b2')) + call assert_equal('-?&/#+=:[]@-', uri_decode('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-')) + call assert_equal('"<>^`{|}', uri_decode('%22%3C%3E%5E%60%7B%7C%7D')) + call assert_equal('αβγδε', '%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5'->uri_decode()) + call assert_equal('résumé', uri_decode('r%C3%A9sum%C3%A9')) + call assert_equal('你好', uri_decode('%E4%BD%A0%E5%A5%BD')) + call assert_equal('😊😊', uri_decode('%F0%9F%98%8A%F0%9F%98%8A')) + call assert_equal('a+b', uri_decode('a+b')) + call assert_equal('-_.~', uri_decode('-_.~')) + call assert_equal('', uri_decode('')) + call assert_equal('%20%23', uri_decode('%2520%2523')) + call assert_equal('', uri_decode(test_null_string())) + call assert_equal('a', uri_decode('a')) + call assert_equal(' ', uri_decode('%20')) + call assert_equal('α', uri_decode('%CE%B1')) + call assert_equal('c:\my\dir\a b c', uri_decode('c%3A%5Cmy%5Cdir%5Ca%20b%20c')) + call assert_equal('%', uri_decode('%')) + call assert_equal('%3', uri_decode('%3')) + call assert_equal(';', uri_decode('%3b')) + call assert_equal('a%xyb', uri_decode('a%xyb')) + call assert_fails('call uri_decode([])', 'E1174: String required for argument 1') + + #" control characters + VAR cstr = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10" + LET cstr ..= "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + VAR expected = '' + for i in range(1, 31) + LET expected ..= printf("%%%02X", i) + endfor + call assert_equal(expected, uri_encode(cstr)) + call assert_equal(cstr, uri_decode(expected)) + END + call v9.CheckLegacyAndVim9Success(lines) +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c index aa3142c5bf..6fe0936c71 100644 --- a/src/version.c +++ b/src/version.c @@ -724,6 +724,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1669, /**/ 1668, /**/