mirror of
https://github.com/vim/vim.git
synced 2025-10-18 07:54:29 -04:00
patch 9.1.1669: Vim script: no support for URI de-/encoding
Problem: Vim script: no support for URI de-/encoding (ubaldot) Solution: Add the uri_encode() and uri_decode() functions (Yegappan Lakshmanan) fixes: #17861 closes: #18034 Signed-off-by: Yegappan Lakshmanan <yegappan@yahoo.com> Signed-off-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
committed by
Christian Brabandt
parent
da34f84847
commit
454c7ea484
@@ -747,6 +747,8 @@ undofile({name}) String undo file name for {name}
|
|||||||
undotree([{buf}]) List undo file tree for buffer {buf}
|
undotree([{buf}]) List undo file tree for buffer {buf}
|
||||||
uniq({list} [, {func} [, {dict}]])
|
uniq({list} [, {func} [, {dict}]])
|
||||||
List remove adjacent duplicates from a list
|
List remove adjacent duplicates from a list
|
||||||
|
uri_decode({string}) String URI-decode a string
|
||||||
|
uri_encode({string}) String URI-encode a string
|
||||||
utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
|
utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
|
||||||
Number UTF-16 index of byte {idx} in {string}
|
Number UTF-16 index of byte {idx} in {string}
|
||||||
values({dict}) List values in {dict}
|
values({dict}) List values in {dict}
|
||||||
@@ -12187,6 +12189,59 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882*
|
|||||||
Return type: list<{type}>
|
Return type: list<{type}>
|
||||||
|
|
||||||
|
|
||||||
|
uri_decode({string}) *uri_decode()*
|
||||||
|
Returns the URI-decoded form of {string}, reversing
|
||||||
|
percent-encoding (converting sequences like "%3D" back to
|
||||||
|
the corresponding character).
|
||||||
|
|
||||||
|
The decoding follows standard percent-decoding rules:
|
||||||
|
- "%HH" is replaced with the character for the hex value
|
||||||
|
HH.
|
||||||
|
- If the decoded bytes form valid UTF-8, they are combined
|
||||||
|
into the corresponding character(s). Otherwise, the
|
||||||
|
bytes are kept as-is.
|
||||||
|
- Invalid or incomplete encodings (e.g. "%GZ", "%3", or a
|
||||||
|
trailing "%") are left unchanged.
|
||||||
|
|
||||||
|
Returns an empty String if {string} is empty.
|
||||||
|
|
||||||
|
Example: >
|
||||||
|
:echo uri_decode('c%3A%5Cmy%5Cdir%5Cfoo%20bar')
|
||||||
|
c:\my\dir\foo bar
|
||||||
|
:echo uri_decode('%CE%B1%CE%B2%CE%B3')
|
||||||
|
αβγ
|
||||||
|
<
|
||||||
|
Can also be used as a |method|: >
|
||||||
|
mystr->uri_decode()
|
||||||
|
<
|
||||||
|
Return type: |String|
|
||||||
|
|
||||||
|
uri_encode({string}) *uri_encode()*
|
||||||
|
Returns the URI-encoded form of {string}. URI encoding
|
||||||
|
replaces unsafe or reserved characters with percent-encoded
|
||||||
|
sequences.
|
||||||
|
|
||||||
|
The encoding follows standard percent-encoding rules:
|
||||||
|
- Alphanumeric characters [0-9A-Za-z] remain unchanged.
|
||||||
|
- The characters "-", "_", ".", and "~" also remain
|
||||||
|
unchanged.
|
||||||
|
- All other characters are replaced with "%HH", where HH
|
||||||
|
is the two-digit uppercase hexadecimal value.
|
||||||
|
- Existing percent-encoded sequences are not modified.
|
||||||
|
|
||||||
|
Returns an empty String if {string} is empty.
|
||||||
|
|
||||||
|
Example: >
|
||||||
|
:echo uri_encode('c:\my\dir\foo bar')
|
||||||
|
c%3A%5Cmy%5Cdir%5Cfoo%20bar
|
||||||
|
:echo uri_encode('key=value&name=αβγ')
|
||||||
|
key%3Dvalue%26name%3D%CE%B1%CE%B2%CE%B3
|
||||||
|
<
|
||||||
|
Can also be used as a |method|: >
|
||||||
|
mystr->uri_encode()
|
||||||
|
<
|
||||||
|
Return type: |String|
|
||||||
|
|
||||||
*utf16idx()*
|
*utf16idx()*
|
||||||
utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
|
utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
|
||||||
Same as |charidx()| but returns the UTF-16 code unit index of
|
Same as |charidx()| but returns the UTF-16 code unit index of
|
||||||
|
@@ -11101,6 +11101,8 @@ unix os_unix.txt /*unix*
|
|||||||
unlisted-buffer windows.txt /*unlisted-buffer*
|
unlisted-buffer windows.txt /*unlisted-buffer*
|
||||||
up-down-motions motion.txt /*up-down-motions*
|
up-down-motions motion.txt /*up-down-motions*
|
||||||
uppercase change.txt /*uppercase*
|
uppercase change.txt /*uppercase*
|
||||||
|
uri_decode() builtin.txt /*uri_decode()*
|
||||||
|
uri_encode() builtin.txt /*uri_encode()*
|
||||||
urxvt-mouse options.txt /*urxvt-mouse*
|
urxvt-mouse options.txt /*urxvt-mouse*
|
||||||
use-visual-cmds version4.txt /*use-visual-cmds*
|
use-visual-cmds version4.txt /*use-visual-cmds*
|
||||||
useful-mappings tips.txt /*useful-mappings*
|
useful-mappings tips.txt /*useful-mappings*
|
||||||
|
@@ -807,6 +807,8 @@ String manipulation: *string-functions*
|
|||||||
str2blob() convert a list of strings into a blob
|
str2blob() convert a list of strings into a blob
|
||||||
blob2str() convert a blob into a list of strings
|
blob2str() convert a blob into a list of strings
|
||||||
items() get List of String index-character pairs
|
items() get List of String index-character pairs
|
||||||
|
uri_encode() URI-encode a string
|
||||||
|
uri_decode() URI-decode a string
|
||||||
|
|
||||||
List manipulation: *list-functions*
|
List manipulation: *list-functions*
|
||||||
get() get an item without error for wrong index
|
get() get an item without error for wrong index
|
||||||
|
@@ -41788,6 +41788,8 @@ Functions: ~
|
|||||||
|str2blob()| convert a List of strings into a blob
|
|str2blob()| convert a List of strings into a blob
|
||||||
|test_null_tuple()| return a null tuple
|
|test_null_tuple()| return a null tuple
|
||||||
|tuple2list()| turn a Tuple of items into a List
|
|tuple2list()| turn a Tuple of items into a List
|
||||||
|
|uri_decode()| URI-decode a string
|
||||||
|
|uri_encode()| URI-encode a string
|
||||||
|wildtrigger()| trigger wildcard expansion
|
|wildtrigger()| trigger wildcard expansion
|
||||||
|
|
||||||
|
|
||||||
|
@@ -3116,6 +3116,10 @@ static funcentry_T global_functions[] =
|
|||||||
ret_dict_any, f_undotree},
|
ret_dict_any, f_undotree},
|
||||||
{"uniq", 1, 3, FEARG_1, arg13_sortuniq,
|
{"uniq", 1, 3, FEARG_1, arg13_sortuniq,
|
||||||
ret_first_arg, f_uniq},
|
ret_first_arg, f_uniq},
|
||||||
|
{"uri_decode", 1, 1, FEARG_1, arg1_string,
|
||||||
|
ret_string, f_uridecode},
|
||||||
|
{"uri_encode", 1, 1, FEARG_1, arg1_string,
|
||||||
|
ret_string, f_uriencode},
|
||||||
{"utf16idx", 2, 4, FEARG_1, arg4_string_number_bool_bool,
|
{"utf16idx", 2, 4, FEARG_1, arg4_string_number_bool_bool,
|
||||||
ret_number, f_utf16idx},
|
ret_number, f_utf16idx},
|
||||||
{"values", 1, 1, FEARG_1, arg1_dict_any,
|
{"values", 1, 1, FEARG_1, arg1_dict_any,
|
||||||
|
@@ -52,4 +52,6 @@ void f_tolower(typval_T *argvars, typval_T *rettv);
|
|||||||
void f_toupper(typval_T *argvars, typval_T *rettv);
|
void f_toupper(typval_T *argvars, typval_T *rettv);
|
||||||
void f_tr(typval_T *argvars, typval_T *rettv);
|
void f_tr(typval_T *argvars, typval_T *rettv);
|
||||||
void f_trim(typval_T *argvars, typval_T *rettv);
|
void f_trim(typval_T *argvars, typval_T *rettv);
|
||||||
|
void f_uridecode(typval_T *argvars, typval_T *rettv);
|
||||||
|
void f_uriencode(typval_T *argvars, typval_T *rettv);
|
||||||
/* vim: set ft=c : */
|
/* vim: set ft=c : */
|
||||||
|
145
src/strings.c
145
src/strings.c
@@ -2310,6 +2310,151 @@ f_trim(typval_T *argvars, typval_T *rettv)
|
|||||||
rettv->vval.v_string = vim_strnsave(head, tail - head);
|
rettv->vval.v_string = vim_strnsave(head, tail - head);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Decodes a URI-encoded string.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* str - The URI-encoded input string (may contain %XX sequences and '+').
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* A newly allocated string with URI encoding decoded:
|
||||||
|
* - %XX sequences are converted to the corresponding character.
|
||||||
|
* - If the input is malformed (e.g., incomplete % sequence), the original
|
||||||
|
* characters are copied.
|
||||||
|
* The output string will never be longer than the input string.
|
||||||
|
* The caller is responsible for freeing the returned string.
|
||||||
|
*
|
||||||
|
* Returns NULL if input is NULL or memory allocation fails.
|
||||||
|
*/
|
||||||
|
static char_u *
|
||||||
|
uri_decode(char_u *str)
|
||||||
|
{
|
||||||
|
if (str == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
size_t len = STRLEN(str);
|
||||||
|
|
||||||
|
char_u *decoded = alloc(len + 1);
|
||||||
|
if (!decoded)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
char_u *p = decoded;
|
||||||
|
size_t i = 0;
|
||||||
|
|
||||||
|
while (i < len)
|
||||||
|
{
|
||||||
|
if (str[i] == '%')
|
||||||
|
{
|
||||||
|
if (i + 2 >= len)
|
||||||
|
{
|
||||||
|
// Malformed encoding
|
||||||
|
*p++ = str[i++];
|
||||||
|
if (str[i] != NUL)
|
||||||
|
*p++ = str[i++];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int val = hexhex2nr(&str[i + 1]);
|
||||||
|
if (val != -1)
|
||||||
|
{
|
||||||
|
*p++ = (char_u)val;
|
||||||
|
i += 3;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// invalid hex digits following "%"
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
*p++ = str[i++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
*p++ = str[i++];
|
||||||
|
}
|
||||||
|
|
||||||
|
*p = NUL;
|
||||||
|
|
||||||
|
return decoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* "uri_decode({str})" function
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
f_uridecode(typval_T *argvars, typval_T *rettv)
|
||||||
|
{
|
||||||
|
rettv->v_type = VAR_STRING;
|
||||||
|
rettv->vval.v_string = NULL;
|
||||||
|
|
||||||
|
if (check_for_string_arg(argvars, 0) == FAIL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
rettv->vval.v_string = uri_decode(tv_get_string(&argvars[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Encodes a string for safe use in a URI.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* str - The input string to encode.
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* A newly allocated string where:
|
||||||
|
* - Alphanumeric characters and '-', '_', '.', '~' are left unchanged.
|
||||||
|
* - All other bytes are encoded as %XX (uppercase hex).
|
||||||
|
* The caller is responsible for freeing the returned string.
|
||||||
|
*
|
||||||
|
* Returns NULL if input is NULL or memory allocation fails.
|
||||||
|
*/
|
||||||
|
static char_u *
|
||||||
|
uri_encode(char_u *str)
|
||||||
|
{
|
||||||
|
if (str == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
size_t len = STRLEN(str);
|
||||||
|
|
||||||
|
// Worst case: every character needs encoding => 3x size + 1 for null
|
||||||
|
// terminator
|
||||||
|
char_u *encoded = alloc(len * 3 + 1);
|
||||||
|
if (encoded == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
char_u *p = encoded;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < len; ++i)
|
||||||
|
{
|
||||||
|
char_u c = str[i];
|
||||||
|
if (ASCII_ISALNUM(c) || c == '-' || c == '_' || c == '.' || c == '~')
|
||||||
|
*p++ = c;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sprintf((char *)p, "%%%02X", c);
|
||||||
|
p += 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*p = NUL;
|
||||||
|
|
||||||
|
return encoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* "uri_encode({str})" function
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
f_uriencode(typval_T *argvars, typval_T *rettv)
|
||||||
|
{
|
||||||
|
rettv->v_type = VAR_STRING;
|
||||||
|
rettv->vval.v_string = NULL;
|
||||||
|
|
||||||
|
if (check_for_string_arg(argvars, 0) == FAIL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
rettv->vval.v_string = uri_encode(tv_get_string(&argvars[0]));
|
||||||
|
}
|
||||||
|
|
||||||
static char *e_printf = N_(e_insufficient_arguments_for_printf);
|
static char *e_printf = N_(e_insufficient_arguments_for_printf);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -4503,4 +4503,61 @@ func Test_blob2str()
|
|||||||
call v9.CheckLegacyAndVim9Success(lines)
|
call v9.CheckLegacyAndVim9Success(lines)
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
" Test for uri_encode() and uri_decode() functions
|
||||||
|
func Test_uriencoding()
|
||||||
|
let lines =<< trim END
|
||||||
|
#" uri encoding
|
||||||
|
call assert_equal('a1%20b2', uri_encode('a1 b2'))
|
||||||
|
call assert_equal('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-', uri_encode('-?&/#+=:[]@-'))
|
||||||
|
call assert_equal('%22%3C%3E%5E%60%7B%7C%7D', uri_encode('"<>^`{|}'))
|
||||||
|
call assert_equal('%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5', 'αβγδε'->uri_encode())
|
||||||
|
call assert_equal('r%C3%A9sum%C3%A9', uri_encode('résumé'))
|
||||||
|
call assert_equal('%E4%BD%A0%E5%A5%BD', uri_encode('你好'))
|
||||||
|
call assert_equal('%F0%9F%98%8A%F0%9F%98%8A', uri_encode('😊😊'))
|
||||||
|
call assert_equal('-_.~', uri_encode('-_.~'))
|
||||||
|
call assert_equal('', uri_encode(''))
|
||||||
|
call assert_equal('%2520%2523', uri_encode('%20%23'))
|
||||||
|
call assert_equal('', uri_encode(test_null_string()))
|
||||||
|
call assert_equal('a', uri_encode('a'))
|
||||||
|
call assert_equal('%20', uri_encode(' '))
|
||||||
|
call assert_equal('%CE%B1', uri_encode('α'))
|
||||||
|
call assert_equal('c%3A%5Cmy%5Cdir%5Ca%20b%20c', uri_encode('c:\my\dir\a b c'))
|
||||||
|
call assert_fails('call uri_encode([])', 'E1174: String required for argument 1')
|
||||||
|
|
||||||
|
#" uri decoding
|
||||||
|
call assert_equal('a1 b2', uri_decode('a1%20b2'))
|
||||||
|
call assert_equal('-?&/#+=:[]@-', uri_decode('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-'))
|
||||||
|
call assert_equal('"<>^`{|}', uri_decode('%22%3C%3E%5E%60%7B%7C%7D'))
|
||||||
|
call assert_equal('αβγδε', '%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5'->uri_decode())
|
||||||
|
call assert_equal('résumé', uri_decode('r%C3%A9sum%C3%A9'))
|
||||||
|
call assert_equal('你好', uri_decode('%E4%BD%A0%E5%A5%BD'))
|
||||||
|
call assert_equal('😊😊', uri_decode('%F0%9F%98%8A%F0%9F%98%8A'))
|
||||||
|
call assert_equal('a+b', uri_decode('a+b'))
|
||||||
|
call assert_equal('-_.~', uri_decode('-_.~'))
|
||||||
|
call assert_equal('', uri_decode(''))
|
||||||
|
call assert_equal('%20%23', uri_decode('%2520%2523'))
|
||||||
|
call assert_equal('', uri_decode(test_null_string()))
|
||||||
|
call assert_equal('a', uri_decode('a'))
|
||||||
|
call assert_equal(' ', uri_decode('%20'))
|
||||||
|
call assert_equal('α', uri_decode('%CE%B1'))
|
||||||
|
call assert_equal('c:\my\dir\a b c', uri_decode('c%3A%5Cmy%5Cdir%5Ca%20b%20c'))
|
||||||
|
call assert_equal('%', uri_decode('%'))
|
||||||
|
call assert_equal('%3', uri_decode('%3'))
|
||||||
|
call assert_equal(';', uri_decode('%3b'))
|
||||||
|
call assert_equal('a%xyb', uri_decode('a%xyb'))
|
||||||
|
call assert_fails('call uri_decode([])', 'E1174: String required for argument 1')
|
||||||
|
|
||||||
|
#" control characters
|
||||||
|
VAR cstr = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10"
|
||||||
|
LET cstr ..= "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
|
||||||
|
VAR expected = ''
|
||||||
|
for i in range(1, 31)
|
||||||
|
LET expected ..= printf("%%%02X", i)
|
||||||
|
endfor
|
||||||
|
call assert_equal(expected, uri_encode(cstr))
|
||||||
|
call assert_equal(cstr, uri_decode(expected))
|
||||||
|
END
|
||||||
|
call v9.CheckLegacyAndVim9Success(lines)
|
||||||
|
endfunc
|
||||||
|
|
||||||
" vim: shiftwidth=2 sts=2 expandtab
|
" vim: shiftwidth=2 sts=2 expandtab
|
||||||
|
@@ -724,6 +724,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
1669,
|
||||||
/**/
|
/**/
|
||||||
1668,
|
1668,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user