0
0
mirror of https://github.com/vim/vim.git synced 2025-09-23 03:43:49 -04:00

patch 7.4.1629

Problem:    Handling emoji characters as full width has problems with
            backwards compatibility.
Solution:   Remove ambiguous and double width characters from the emoji table.
            Use a separate table for the character class.
            (partly by Yasuhiro Matsumoto)
This commit is contained in:
Bram Moolenaar
2016-03-21 22:09:44 +01:00
parent 3f3fbd3fdb
commit b86f10ee10
3 changed files with 335 additions and 210 deletions

View File

@@ -32,8 +32,8 @@ func! ParseFoldProps()
if line !~ '^#' && line !~ '^\s*$'
let l = split(line, '\s*;\s*', 1)
if len(l) != 4
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4'
return
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4'
return
endif
call add(s:foldprops, l)
endif
@@ -50,8 +50,8 @@ func! ParseWidthProps()
if line !~ '^#' && line !~ '^\s*$'
let l = split(line, '\s*;\s*', 1)
if len(l) != 2
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2'
return
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2'
return
endif
call add(s:widthprops, l)
endif
@@ -72,18 +72,18 @@ func! BuildCaseTable(name, index)
let n = ('0x' . p[0]) + 0
let nl = ('0x' . p[a:index]) + 0
if start >= 0 && add == nl - n && (step == 0 || n - end == step)
" continue with same range.
let step = n - end
let end = n
" continue with same range.
let step = n - end
let end = n
else
if start >= 0
" produce previous range
call Range(ranges, start, end, step, add)
endif
let start = n
let end = n
let step = 0
let add = nl - n
if start >= 0
" produce previous range
call Range(ranges, start, end, step, add)
endif
let start = n
let end = n
let step = 0
let add = nl - n
endif
endif
endfor
@@ -115,18 +115,18 @@ func! BuildFoldTable()
let n = ('0x' . p[0]) + 0
let nl = ('0x' . p[2]) + 0
if start >= 0 && add == nl - n && (step == 0 || n - end == step)
" continue with same range.
let step = n - end
let end = n
" continue with same range.
let step = n - end
let end = n
else
if start >= 0
" produce previous range
call Range(ranges, start, end, step, add)
endif
let start = n
let end = n
let step = 0
let add = nl - n
if start >= 0
" produce previous range
call Range(ranges, start, end, step, add)
endif
let start = n
let end = n
let step = 0
let add = nl - n
endif
endif
endfor
@@ -160,15 +160,15 @@ func! BuildCombiningTable()
if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me'
let n = ('0x' . p[0]) + 0
if start >= 0 && end + 1 == n
" continue with same range.
let end = n
" continue with same range.
let end = n
else
if start >= 0
" produce previous range
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
endif
let start = n
let end = n
if start >= 0
" produce previous range
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
endif
let start = n
let end = n
endif
endif
endfor
@@ -197,47 +197,57 @@ func! BuildWidthTable(pattern, tableName)
for p in s:widthprops
if p[1][0] =~ a:pattern
if p[0] =~ '\.\.'
" It is a range. we don't check for composing char then.
let rng = split(p[0], '\.\.')
if len(rng) != 2
echoerr "Cannot parse range: '" . p[0] . "' in width table"
endif
let n = ('0x' . rng[0]) + 0
let n_last = ('0x' . rng[1]) + 0
" It is a range. we don't check for composing char then.
let rng = split(p[0], '\.\.')
if len(rng) != 2
echoerr "Cannot parse range: '" . p[0] . "' in width table"
endif
let n = ('0x' . rng[0]) + 0
let n_last = ('0x' . rng[1]) + 0
else
let n = ('0x' . p[0]) + 0
let n_last = n
let n = ('0x' . p[0]) + 0
let n_last = n
endif
" Find this char in the data table.
while 1
let dn = ('0x' . s:dataprops[dataidx][0]) + 0
if dn >= n
break
endif
let dataidx += 1
let dn = ('0x' . s:dataprops[dataidx][0]) + 0
if dn >= n
break
endif
let dataidx += 1
endwhile
if dn != n && n_last == n
echoerr "Cannot find character " . n . " in data table"
echoerr "Cannot find character " . n . " in data table"
endif
" Only use the char when it's not a composing char.
" But use all chars from a range.
let dp = s:dataprops[dataidx]
if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me')
if start >= 0 && end + 1 == n
" continue with same range.
else
if start >= 0
" produce previous range
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
endif
let start = n
endif
let end = n_last
if start >= 0 && end + 1 == n
" continue with same range.
else
if start >= 0
" produce previous range
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
if a:pattern == 'A'
call add(s:ambitable, [start, end])
else
call add(s:doubletable, [start, end])
endif
endif
let start = n
endif
let end = n_last
endif
endif
endfor
if start >= 0
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
if a:pattern == 'A'
call add(s:ambitable, [start, end])
else
call add(s:doubletable, [start, end])
endif
endif
" New buffer to put the result in.
@@ -253,21 +263,72 @@ endfunc
" Build the amoji width table in a new buffer.
func! BuildEmojiTable(pattern, tableName)
let ranges = []
for line in map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
let alltokens = []
let widthtokens = []
let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
for n in range(len(lines))
let line = lines[n]
let token = split(line, '\.\.')
let first = ('0x' . token[0]) + 0
if len(token) == 1
call add(token, token[0])
let last = first
else
let last = ('0x' . token[1]) + 0
endif
let token = [first, last]
if len(alltokens) > 0 && (token[0] - 1 == alltokens[-1][1])
let alltokens[-1][1] = token[1]
else
call add(alltokens, token)
endif
" exclude characters that are in the "ambiguous" or "doublewidth" table
for ambi in s:ambitable
if first >= ambi[0] && first <= ambi[1]
let first = ambi[1] + 1
endif
if last >= ambi[0] && last <= ambi[1]
let last = ambi[0] - 1
endif
endfor
for double in s:doubletable
if first >= double[0] && first <= double[1]
let first = double[1] + 1
endif
if last >= double[0] && last <= double[1]
let last = double[0] - 1
endif
endfor
if first <= last
let token = [first, last]
if len(widthtokens) > 0 && (token[0] - 1 == widthtokens[-1][1])
let widthtokens[-1][1] = token[1]
else
call add(widthtokens, token)
endif
endif
call add(ranges, printf("\t{0x%04x, 0x%04x},", "0x".token[0], "0x".token[1]))
endfor
let allranges = map(alltokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
let widthranges = map(widthtokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
" New buffer to put the result in.
new
exe "file " . a:tableName
call setline(1, " static struct interval " . a:tableName . "[] =")
exe "file " . a:tableName . '_all'
call setline(1, " static struct interval " . a:tableName . "_all[] =")
call setline(2, " {")
call append('$', ranges)
call append('$', allranges)
call setline('$', getline('$')[:-2]) " remove last comma
call setline(line('$') + 1, " };")
wincmd p
" New buffer to put the result in.
new
exe "file " . a:tableName . '_width'
call setline(1, " static struct interval " . a:tableName . "_width[] =")
call setline(2, " {")
call append('$', widthranges)
call setline('$', getline('$')[:-2]) " remove last comma
call setline(line('$') + 1, " };")
wincmd p
@@ -307,13 +368,16 @@ edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
call ParseWidthProps()
" Build the double width table.
let s:doubletable = []
call BuildWidthTable('[WF]', 'doublewidth')
" Build the ambiguous width table.
let s:ambitable = []
call BuildWidthTable('A', 'ambiguous')
" Edit the emoji text file. Requires the netrw plugin.
edit http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
" Build the emoji table. Ver. 1.0 - 6.0
" Must come after the "ambiguous" table
call BuildEmojiTable('; Emoji\s\+# [1-6]\.[0-9]', 'emoji')

View File

@@ -1210,148 +1210,6 @@ intable(struct interval *table, size_t size, int c)
return FALSE;
}
/* Sorted list of non-overlapping intervals of Emoji characters,
* based on http://unicode.org/emoji/charts/emoji-list.html */
static struct interval emoji_tab[] =
{
{0x203c, 0x203c},
{0x2049, 0x2049},
{0x2122, 0x2122},
{0x2139, 0x2139},
{0x2194, 0x2199},
{0x21a9, 0x21aa},
{0x231a, 0x231b},
{0x2328, 0x2328},
{0x23cf, 0x23cf},
{0x23e9, 0x23f3},
{0x24c2, 0x24c2},
{0x25aa, 0x25ab},
{0x25b6, 0x25b6},
{0x25c0, 0x25c0},
{0x25fb, 0x25fe},
{0x2600, 0x2604},
{0x260e, 0x260e},
{0x2611, 0x2611},
{0x2614, 0x2615},
{0x2618, 0x2618},
{0x261d, 0x261d},
{0x2620, 0x2620},
{0x2622, 0x2623},
{0x2626, 0x2626},
{0x262a, 0x262a},
{0x262e, 0x262f},
{0x2638, 0x263a},
{0x2648, 0x2653},
{0x2660, 0x2660},
{0x2663, 0x2663},
{0x2665, 0x2666},
{0x2668, 0x2668},
{0x267b, 0x267b},
{0x267f, 0x267f},
{0x2692, 0x2694},
{0x2696, 0x2697},
{0x2699, 0x2699},
{0x269b, 0x269c},
{0x26a0, 0x26a1},
{0x26aa, 0x26ab},
{0x26b0, 0x26b1},
{0x26bd, 0x26be},
{0x26c4, 0x26c5},
{0x26c8, 0x26c8},
{0x26ce, 0x26ce},
{0x26cf, 0x26cf},
{0x26d1, 0x26d1},
{0x26d3, 0x26d4},
{0x26e9, 0x26ea},
{0x26f0, 0x26f5},
{0x26f7, 0x26fa},
{0x26fd, 0x26fd},
{0x2702, 0x2702},
{0x2705, 0x2705},
{0x2708, 0x2709},
{0x270a, 0x270b},
{0x270c, 0x270d},
{0x270f, 0x270f},
{0x2712, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271d, 0x271d},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274c, 0x274c},
{0x274e, 0x274e},
{0x2753, 0x2755},
{0x2757, 0x2757},
{0x2763, 0x2764},
{0x2795, 0x2797},
{0x27a1, 0x27a1},
{0x27b0, 0x27b0},
{0x27bf, 0x27bf},
{0x2934, 0x2935},
{0x2b05, 0x2b07},
{0x2b1b, 0x2b1c},
{0x2b50, 0x2b50},
{0x2b55, 0x2b55},
{0x3030, 0x3030},
{0x303d, 0x303d},
{0x3297, 0x3297},
{0x3299, 0x3299},
{0x1f004, 0x1f004},
{0x1f0cf, 0x1f0cf},
{0x1f170, 0x1f171},
{0x1f17e, 0x1f17e},
{0x1f17f, 0x1f17f},
{0x1f18e, 0x1f18e},
{0x1f191, 0x1f19a},
{0x1f1e6, 0x1f1ff},
{0x1f201, 0x1f202},
{0x1f21a, 0x1f21a},
{0x1f22f, 0x1f22f},
{0x1f232, 0x1f23a},
{0x1f250, 0x1f251},
{0x1f300, 0x1f320},
{0x1f330, 0x1f335},
{0x1f337, 0x1f37c},
{0x1f380, 0x1f393},
{0x1f3a0, 0x1f3c4},
{0x1f3c6, 0x1f3ca},
{0x1f3e0, 0x1f3f0},
{0x1f400, 0x1f43e},
{0x1f440, 0x1f440},
{0x1f442, 0x1f4f7},
{0x1f4f9, 0x1f4fc},
{0x1f500, 0x1f53d},
{0x1f550, 0x1f567},
{0x1f5fb, 0x1f5ff},
{0x1f600, 0x1f600},
{0x1f601, 0x1f610},
{0x1f611, 0x1f611},
{0x1f612, 0x1f614},
{0x1f615, 0x1f615},
{0x1f616, 0x1f616},
{0x1f617, 0x1f617},
{0x1f618, 0x1f618},
{0x1f619, 0x1f619},
{0x1f61a, 0x1f61a},
{0x1f61b, 0x1f61b},
{0x1f61c, 0x1f61e},
{0x1f61f, 0x1f61f},
{0x1f620, 0x1f625},
{0x1f626, 0x1f627},
{0x1f628, 0x1f62b},
{0x1f62c, 0x1f62c},
{0x1f62d, 0x1f62d},
{0x1f62e, 0x1f62f},
{0x1f630, 0x1f633},
{0x1f634, 0x1f634},
{0x1f635, 0x1f640},
{0x1f645, 0x1f64f},
{0x1f680, 0x1f6c5}
};
/*
* For UTF-8 character "c" return 2 for a double-width character, 1 for others.
* Returns 4 or 6 for an unprintable character.
@@ -1577,6 +1435,90 @@ utf_char2cells(int c)
{0x100000, 0x10fffd}
};
/* Sorted list of non-overlapping intervals of Emoji characters that don't
* have ambiguous or double width,
* based on http://unicode.org/emoji/charts/emoji-list.html */
static struct interval emoji_width[] =
{
{0x203c, 0x203c},
{0x2049, 0x2049},
{0x2139, 0x2139},
{0x21a9, 0x21aa},
{0x231a, 0x231b},
{0x2328, 0x2328},
{0x23cf, 0x23cf},
{0x23e9, 0x23f3},
{0x25aa, 0x25ab},
{0x25fb, 0x25fe},
{0x2600, 0x2604},
{0x2611, 0x2611},
{0x2618, 0x2618},
{0x261d, 0x261d},
{0x2620, 0x2620},
{0x2622, 0x2623},
{0x2626, 0x2626},
{0x262a, 0x262a},
{0x262e, 0x262f},
{0x2638, 0x263a},
{0x2648, 0x2653},
{0x2666, 0x2666},
{0x267b, 0x267b},
{0x267f, 0x267f},
{0x2692, 0x2694},
{0x2696, 0x2697},
{0x2699, 0x2699},
{0x269b, 0x269c},
{0x26a0, 0x26a1},
{0x26aa, 0x26ab},
{0x26b0, 0x26b1},
{0x26bd, 0x26bd},
{0x26ce, 0x26ce},
{0x2702, 0x2702},
{0x2705, 0x2705},
{0x2708, 0x270d},
{0x270f, 0x270f},
{0x2712, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271d, 0x271d},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274c, 0x274c},
{0x274e, 0x274e},
{0x2753, 0x2755},
{0x2763, 0x2764},
{0x2795, 0x2797},
{0x27a1, 0x27a1},
{0x27b0, 0x27b0},
{0x27bf, 0x27bf},
{0x2934, 0x2935},
{0x2b05, 0x2b07},
{0x2b1b, 0x2b1c},
{0x2b50, 0x2b50},
{0x1f004, 0x1f004},
{0x1f0cf, 0x1f0cf},
{0x1f1e6, 0x1f1ff},
{0x1f300, 0x1f320},
{0x1f330, 0x1f335},
{0x1f337, 0x1f37c},
{0x1f380, 0x1f393},
{0x1f3a0, 0x1f3c4},
{0x1f3c6, 0x1f3ca},
{0x1f3e0, 0x1f3f0},
{0x1f400, 0x1f43e},
{0x1f440, 0x1f440},
{0x1f442, 0x1f4f7},
{0x1f4f9, 0x1f4fc},
{0x1f500, 0x1f53d},
{0x1f550, 0x1f567},
{0x1f5fb, 0x1f640},
{0x1f645, 0x1f64f},
{0x1f680, 0x1f6c5}
};
if (c >= 0x100)
{
#ifdef USE_WCHAR_FUNCTIONS
@@ -1596,7 +1538,7 @@ utf_char2cells(int c)
if (intable(doublewidth, sizeof(doublewidth), c))
return 2;
#endif
if (p_emoji && intable(emoji_tab, sizeof(emoji_tab), c))
if (p_emoji && intable(emoji_width, sizeof(emoji_width), c))
return 2;
}
@@ -2674,6 +2616,123 @@ utf_class(int c)
{0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */
{0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */
};
/* Sorted list of non-overlapping intervals of all Emoji characters,
* based on http://unicode.org/emoji/charts/emoji-list.html */
static struct interval emoji_all[] =
{
{0x203c, 0x203c},
{0x2049, 0x2049},
{0x2122, 0x2122},
{0x2139, 0x2139},
{0x2194, 0x2199},
{0x21a9, 0x21aa},
{0x231a, 0x231b},
{0x2328, 0x2328},
{0x23cf, 0x23cf},
{0x23e9, 0x23f3},
{0x24c2, 0x24c2},
{0x25aa, 0x25ab},
{0x25b6, 0x25b6},
{0x25c0, 0x25c0},
{0x25fb, 0x25fe},
{0x2600, 0x2604},
{0x260e, 0x260e},
{0x2611, 0x2611},
{0x2614, 0x2615},
{0x2618, 0x2618},
{0x261d, 0x261d},
{0x2620, 0x2620},
{0x2622, 0x2623},
{0x2626, 0x2626},
{0x262a, 0x262a},
{0x262e, 0x262f},
{0x2638, 0x263a},
{0x2648, 0x2653},
{0x2660, 0x2660},
{0x2663, 0x2663},
{0x2665, 0x2666},
{0x2668, 0x2668},
{0x267b, 0x267b},
{0x267f, 0x267f},
{0x2692, 0x2694},
{0x2696, 0x2697},
{0x2699, 0x2699},
{0x269b, 0x269c},
{0x26a0, 0x26a1},
{0x26aa, 0x26ab},
{0x26b0, 0x26b1},
{0x26bd, 0x26be},
{0x26c4, 0x26c5},
{0x26c8, 0x26c8},
{0x26ce, 0x26cf},
{0x26d1, 0x26d1},
{0x26d3, 0x26d4},
{0x26e9, 0x26ea},
{0x26f0, 0x26f5},
{0x26f7, 0x26fa},
{0x26fd, 0x26fd},
{0x2702, 0x2702},
{0x2705, 0x2705},
{0x2708, 0x270d},
{0x270f, 0x270f},
{0x2712, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271d, 0x271d},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274c, 0x274c},
{0x274e, 0x274e},
{0x2753, 0x2755},
{0x2757, 0x2757},
{0x2763, 0x2764},
{0x2795, 0x2797},
{0x27a1, 0x27a1},
{0x27b0, 0x27b0},
{0x27bf, 0x27bf},
{0x2934, 0x2935},
{0x2b05, 0x2b07},
{0x2b1b, 0x2b1c},
{0x2b50, 0x2b50},
{0x2b55, 0x2b55},
{0x3030, 0x3030},
{0x303d, 0x303d},
{0x3297, 0x3297},
{0x3299, 0x3299},
{0x1f004, 0x1f004},
{0x1f0cf, 0x1f0cf},
{0x1f170, 0x1f171},
{0x1f17e, 0x1f17f},
{0x1f18e, 0x1f18e},
{0x1f191, 0x1f19a},
{0x1f1e6, 0x1f1ff},
{0x1f201, 0x1f202},
{0x1f21a, 0x1f21a},
{0x1f22f, 0x1f22f},
{0x1f232, 0x1f23a},
{0x1f250, 0x1f251},
{0x1f300, 0x1f320},
{0x1f330, 0x1f335},
{0x1f337, 0x1f37c},
{0x1f380, 0x1f393},
{0x1f3a0, 0x1f3c4},
{0x1f3c6, 0x1f3ca},
{0x1f3e0, 0x1f3f0},
{0x1f400, 0x1f43e},
{0x1f440, 0x1f440},
{0x1f442, 0x1f4f7},
{0x1f4f9, 0x1f4fc},
{0x1f500, 0x1f53d},
{0x1f550, 0x1f567},
{0x1f5fb, 0x1f640},
{0x1f645, 0x1f64f},
{0x1f680, 0x1f6c5}
};
int bot = 0;
int top = sizeof(classes) / sizeof(struct clinterval) - 1;
int mid;
@@ -2701,7 +2760,7 @@ utf_class(int c)
}
/* emoji */
if (intable(emoji_tab, sizeof(emoji_tab), c))
if (intable(emoji_all, sizeof(emoji_all), c))
return 3;
/* most other characters are "word" characters */

View File

@@ -748,6 +748,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
1629,
/**/
1628,
/**/