mirror of
https://github.com/vim/vim.git
synced 2025-08-31 20:53:42 -04:00
patch 8.0.0519: character classes are not well tested
Problem: Character classes are not well tested. They can differ between platforms. Solution: Add tests. In the documentation make clear which classes depend on what library function. Only use :cntrl: and :graph: for ASCII. (Kazunobu Kuriyama, Dominique Pelle, closes #1560) Update the documentation.
This commit is contained in:
parent
c6cd8409c2
commit
0c078fc7db
@ -1085,25 +1085,27 @@ x A single character, with no special meaning, matches itself
|
||||
- A character class expression is evaluated to the set of characters
|
||||
belonging to that character class. The following character classes
|
||||
are supported:
|
||||
Name Contents ~
|
||||
*[:alnum:]* [:alnum:] ASCII letters and digits
|
||||
*[:alpha:]* [:alpha:] ASCII letters
|
||||
*[:blank:]* [:blank:] space and tab characters
|
||||
*[:cntrl:]* [:cntrl:] control characters
|
||||
*[:digit:]* [:digit:] decimal digits
|
||||
*[:graph:]* [:graph:] printable characters excluding space
|
||||
*[:lower:]* [:lower:] lowercase letters (all letters when
|
||||
Name Func Contents ~
|
||||
*[:alnum:]* [:alnum:] isalnum ASCII letters and digits
|
||||
*[:alpha:]* [:alpha:] isalpha ASCII letters
|
||||
*[:blank:]* [:blank:] space and tab
|
||||
*[:cntrl:]* [:cntrl:] iscntrl ASCII control characters
|
||||
*[:digit:]* [:digit:] decimal digits '0' to '9'
|
||||
*[:graph:]* [:graph:] isgraph ASCII printable characters excluding
|
||||
space
|
||||
*[:lower:]* [:lower:] (1) lowercase letters (all letters when
|
||||
'ignorecase' is used)
|
||||
*[:print:]* [:print:] printable characters including space
|
||||
*[:punct:]* [:punct:] ASCII punctuation characters
|
||||
*[:space:]* [:space:] whitespace characters
|
||||
*[:upper:]* [:upper:] uppercase letters (all letters when
|
||||
*[:print:]* [:print:] (2) printable characters including space
|
||||
*[:punct:]* [:punct:] ispunct ASCII punctuation characters
|
||||
*[:space:]* [:space:] whitespace characters: space, tab, CR,
|
||||
NL, vertical tab, form feed
|
||||
*[:upper:]* [:upper:] (3) uppercase letters (all letters when
|
||||
'ignorecase' is used)
|
||||
*[:xdigit:]* [:xdigit:] hexadecimal digits
|
||||
*[:return:]* [:return:] the <CR> character
|
||||
*[:tab:]* [:tab:] the <Tab> character
|
||||
*[:escape:]* [:escape:] the <Esc> character
|
||||
*[:backspace:]* [:backspace:] the <BS> character
|
||||
*[:xdigit:]* [:xdigit:] hexadecimal digits: 0-9, a-f, A-F
|
||||
*[:return:]* [:return:] the <CR> character
|
||||
*[:tab:]* [:tab:] the <Tab> character
|
||||
*[:escape:]* [:escape:] the <Esc> character
|
||||
*[:backspace:]* [:backspace:] the <BS> character
|
||||
The brackets in character class expressions are additional to the
|
||||
brackets delimiting a collection. For example, the following is a
|
||||
plausible pattern for a UNIX filename: "[-./[:alnum:]_~]\+" That is,
|
||||
@ -1114,6 +1116,13 @@ x A single character, with no special meaning, matches itself
|
||||
regexp engine. See |two-engines|. In the future these items may
|
||||
work for multi-byte characters. For now, to get all "alpha"
|
||||
characters you can use: [[:lower:][:upper:]].
|
||||
|
||||
The "Func" column shows what library function is used. The
|
||||
implementation depends on the system. Otherwise:
|
||||
(1) Uses islower() for ASCII and Vim builtin rules for other
|
||||
characters when built with the |+multi_byte| feature.
|
||||
(2) Uses Vim builtin rules
|
||||
(3) As with (1) but using isupper()
|
||||
*/[[=* *[==]*
|
||||
- An equivalence class. This means that characters are matched that
|
||||
have almost the same meaning, e.g., when ignoring accents. This
|
||||
|
@ -2555,17 +2555,17 @@ collection:
|
||||
regc('\t');
|
||||
break;
|
||||
case CLASS_CNTRL:
|
||||
for (cu = 1; cu <= 255; cu++)
|
||||
for (cu = 1; cu <= 127; cu++)
|
||||
if (iscntrl(cu))
|
||||
regmbc(cu);
|
||||
break;
|
||||
case CLASS_DIGIT:
|
||||
for (cu = 1; cu <= 255; cu++)
|
||||
for (cu = 1; cu <= 127; cu++)
|
||||
if (VIM_ISDIGIT(cu))
|
||||
regmbc(cu);
|
||||
break;
|
||||
case CLASS_GRAPH:
|
||||
for (cu = 1; cu <= 255; cu++)
|
||||
for (cu = 1; cu <= 127; cu++)
|
||||
if (isgraph(cu))
|
||||
regmbc(cu);
|
||||
break;
|
||||
|
@ -4871,7 +4871,7 @@ check_char_class(int class, int c)
|
||||
return OK;
|
||||
break;
|
||||
case NFA_CLASS_CNTRL:
|
||||
if (c >= 1 && c <= 255 && iscntrl(c))
|
||||
if (c >= 1 && c <= 127 && iscntrl(c))
|
||||
return OK;
|
||||
break;
|
||||
case NFA_CLASS_DIGIT:
|
||||
@ -4879,7 +4879,7 @@ check_char_class(int class, int c)
|
||||
return OK;
|
||||
break;
|
||||
case NFA_CLASS_GRAPH:
|
||||
if (c >= 1 && c <= 255 && isgraph(c))
|
||||
if (c >= 1 && c <= 127 && isgraph(c))
|
||||
return OK;
|
||||
break;
|
||||
case NFA_CLASS_LOWER:
|
||||
|
@ -38,12 +38,21 @@ func s:classes_test()
|
||||
set isprint=@,161-255
|
||||
call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
|
||||
|
||||
let alphachars = ''
|
||||
let lowerchars = ''
|
||||
let upperchars = ''
|
||||
let alnumchars = ''
|
||||
let alphachars = ''
|
||||
let backspacechar = ''
|
||||
let blankchars = ''
|
||||
let cntrlchars = ''
|
||||
let digitchars = ''
|
||||
let escapechar = ''
|
||||
let graphchars = ''
|
||||
let lowerchars = ''
|
||||
let printchars = ''
|
||||
let punctchars = ''
|
||||
let returnchar = ''
|
||||
let spacechars = ''
|
||||
let tabchar = ''
|
||||
let upperchars = ''
|
||||
let xdigitchars = ''
|
||||
let i = 1
|
||||
while i <= 255
|
||||
@ -51,21 +60,48 @@ func s:classes_test()
|
||||
if c =~ '[[:alpha:]]'
|
||||
let alphachars .= c
|
||||
endif
|
||||
if c =~ '[[:lower:]]'
|
||||
let lowerchars .= c
|
||||
endif
|
||||
if c =~ '[[:upper:]]'
|
||||
let upperchars .= c
|
||||
endif
|
||||
if c =~ '[[:alnum:]]'
|
||||
let alnumchars .= c
|
||||
endif
|
||||
if c =~ '[[:backspace:]]'
|
||||
let backspacechar .= c
|
||||
endif
|
||||
if c =~ '[[:blank:]]'
|
||||
let blankchars .= c
|
||||
endif
|
||||
if c =~ '[[:cntrl:]]'
|
||||
let cntrlchars .= c
|
||||
endif
|
||||
if c =~ '[[:digit:]]'
|
||||
let digitchars .= c
|
||||
endif
|
||||
if c =~ '[[:escape:]]'
|
||||
let escapechar .= c
|
||||
endif
|
||||
if c =~ '[[:graph:]]'
|
||||
let graphchars .= c
|
||||
endif
|
||||
if c =~ '[[:lower:]]'
|
||||
let lowerchars .= c
|
||||
endif
|
||||
if c =~ '[[:print:]]'
|
||||
let printchars .= c
|
||||
endif
|
||||
if c =~ '[[:punct:]]'
|
||||
let punctchars .= c
|
||||
endif
|
||||
if c =~ '[[:return:]]'
|
||||
let returnchar .= c
|
||||
endif
|
||||
if c =~ '[[:space:]]'
|
||||
let spacechars .= c
|
||||
endif
|
||||
if c =~ '[[:tab:]]'
|
||||
let tabchar .= c
|
||||
endif
|
||||
if c =~ '[[:upper:]]'
|
||||
let upperchars .= c
|
||||
endif
|
||||
if c =~ '[[:xdigit:]]'
|
||||
let xdigitchars .= c
|
||||
endif
|
||||
@ -73,11 +109,22 @@ func s:classes_test()
|
||||
endwhile
|
||||
|
||||
call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
|
||||
call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
|
||||
call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
|
||||
call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
|
||||
call assert_equal("\b", backspacechar)
|
||||
call assert_equal("\t ", blankchars)
|
||||
" Commented out: it succeeds on Linux and Windows, but fails on macOs in Travis.
|
||||
" call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
|
||||
call assert_equal("0123456789", digitchars)
|
||||
call assert_equal("\<Esc>", escapechar)
|
||||
" Commented out: it succeeds on Linux and Windows, but fails on macOs in Travis.
|
||||
" call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
|
||||
call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
|
||||
call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
|
||||
call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
|
||||
call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
|
||||
call assert_equal("\r", returnchar)
|
||||
call assert_equal("\t\n\x0b\f\r ", spacechars)
|
||||
call assert_equal("\t", tabchar)
|
||||
call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
|
||||
endfunc
|
||||
|
||||
|
@ -764,6 +764,8 @@ static char *(features[]) =
|
||||
|
||||
static int included_patches[] =
|
||||
{ /* Add new patch number below this line */
|
||||
/**/
|
||||
519,
|
||||
/**/
|
||||
518,
|
||||
/**/
|
||||
|
Loading…
x
Reference in New Issue
Block a user