1
0
forked from aniani/vim

updated for version 7.0169

This commit is contained in:
Bram Moolenaar 2005-12-13 20:02:15 +00:00
parent 900b4d77f0
commit b0bf8580c3
6 changed files with 370 additions and 200 deletions

View File

@ -1,4 +1,4 @@
*editing.txt* For Vim version 7.0aa. Last change: 2005 Jul 25 *editing.txt* For Vim version 7.0aa. Last change: 2005 Dec 13
VIM REFERENCE MANUAL by Bram Moolenaar VIM REFERENCE MANUAL by Bram Moolenaar
@ -398,8 +398,9 @@ The expression can contain just about anything, thus this can also be used to
avoid the special meaning of '"', '|', '%' and '#'. avoid the special meaning of '"', '|', '%' and '#'.
*++opt* *[++opt]* *++opt* *[++opt]*
The [++opt] argument can be used to force the value of 'fileformat' or The [++opt] argument can be used to force the value of 'fileformat',
'fileencoding' to a value for one command. The form is: > 'fileencoding' or 'binary' to a value for one command, and to specify the
behavior for bad characters. The form is: >
++{optname}={value} ++{optname}={value}
Where {optname} is one of: *++ff* *++enc* *++bin* *++nobin* Where {optname} is one of: *++ff* *++enc* *++bin* *++nobin*
@ -407,6 +408,7 @@ Where {optname} is one of: *++ff* *++enc* *++bin* *++nobin*
enc or encoding overrides 'fileencoding' enc or encoding overrides 'fileencoding'
bin or binary sets 'binary' bin or binary sets 'binary'
nobin or nobinary resets 'binary' nobin or nobinary resets 'binary'
bad specifies behavior for bad characters
{value} cannot contain white space. It can be any valid value for these {value} cannot contain white space. It can be any valid value for these
options. Examples: > options. Examples: >
@ -416,12 +418,24 @@ This edits the same file again with 'fileformat' set to "unix". >
:w ++enc=latin1 newfile :w ++enc=latin1 newfile
This writes the current buffer to "newfile" in latin1 format. This writes the current buffer to "newfile" in latin1 format.
There may be several ++opt arguments, separated by white space. They must all
appear before any |+cmd| argument.
*++bad*
The argument of "++bad=" specifies what happens with characters that can't be
converted and illegal bytes. It can be one of three things:
++bad=X A single-byte character that replaces each bad character.
++bad=keep Keep bad characters without conversion. Note that this may
result in illegal bytes in your text!
++bad=drop Remove the bad characters.
The default is like "++bad=?": Replace each bad character with a question
mark.
Note that when reading, the 'fileformat' and 'fileencoding' options will be Note that when reading, the 'fileformat' and 'fileencoding' options will be
set to the used format. When writing this doesn't happen, thus a next write set to the used format. When writing this doesn't happen, thus a next write
will use the old value of the option. Same for the 'binary' option. will use the old value of the option. Same for the 'binary' option.
There may be several ++opt arguments, separated by white space. They must all
appear before any |+cmd| argument.
*+cmd* *[+cmd]* *+cmd* *[+cmd]*
The [+cmd] argument can be used to position the cursor in the newly opened The [+cmd] argument can be used to position the cursor in the newly opened

View File

@ -4549,6 +4549,11 @@ getargopt(eap)
arg += 8; arg += 8;
pp = &eap->force_enc; pp = &eap->force_enc;
} }
else if (STRNCMP(arg, "bad", 3) == 0)
{
arg += 3;
pp = &eap->bad_char;
}
#endif #endif
if (pp == NULL || *arg != '=') if (pp == NULL || *arg != '=')
@ -4568,12 +4573,26 @@ getargopt(eap)
return FAIL; return FAIL;
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
} }
else else if (pp == &eap->force_enc)
{ {
/* Make 'fileencoding' lower case. */ /* Make 'fileencoding' lower case. */
for (p = eap->cmd + eap->force_enc; *p != NUL; ++p) for (p = eap->cmd + eap->force_enc; *p != NUL; ++p)
*p = TOLOWER_ASC(*p); *p = TOLOWER_ASC(*p);
} }
else
{
/* Check ++bad= argument. Must be a single-byte character, "keep" or
* "drop". */
p = eap->cmd + eap->bad_char;
if (STRICMP(p, "keep") == 0)
eap->bad_char = BAD_KEEP;
else if (STRICMP(p, "drop") == 0)
eap->bad_char = BAD_DROP;
else if (MB_BYTE2LEN(*p) == 1 && p[1] == NUL)
eap->bad_char = *p;
else
return FAIL;
}
#endif #endif
return OK; return OK;

View File

@ -124,6 +124,7 @@ struct bw_info
static int buf_write_bytes __ARGS((struct bw_info *ip)); static int buf_write_bytes __ARGS((struct bw_info *ip));
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
static linenr_T readfile_linenr __ARGS((linenr_T linecnt, char_u *p, char_u *endp));
static int ucs2bytes __ARGS((unsigned c, char_u **pp, int flags)); static int ucs2bytes __ARGS((unsigned c, char_u **pp, int flags));
static int same_encoding __ARGS((char_u *a, char_u *b)); static int same_encoding __ARGS((char_u *a, char_u *b));
static int get_fio_flags __ARGS((char_u *ptr)); static int get_fio_flags __ARGS((char_u *ptr));
@ -138,6 +139,7 @@ static int get_mac_fio_flags __ARGS((char_u *ptr));
#endif #endif
static int move_lines __ARGS((buf_T *frombuf, buf_T *tobuf)); static int move_lines __ARGS((buf_T *frombuf, buf_T *tobuf));
void void
filemess(buf, name, s, attr) filemess(buf, name, s, attr)
buf_T *buf; buf_T *buf;
@ -257,10 +259,13 @@ readfile(fname, sfname, from, lines_to_skip, lines_to_read, eap, flags)
int file_rewind = FALSE; int file_rewind = FALSE;
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
int can_retry; int can_retry;
int conv_error = FALSE; /* conversion error detected */ linenr_T conv_error = 0; /* line nr with conversion error */
linenr_T illegal_byte = 0; /* line nr with illegal byte */
int keep_dest_enc = FALSE; /* don't retry when char doesn't fit int keep_dest_enc = FALSE; /* don't retry when char doesn't fit
in destination encoding */ in destination encoding */
linenr_T illegal_byte = 0; /* line nr with illegal byte */ int bad_char_behavior = BAD_REPLACE;
/* BAD_KEEP, BAD_DROP or character to
* replace with */
char_u *tmpname = NULL; /* name of 'charconvert' output file */ char_u *tmpname = NULL; /* name of 'charconvert' output file */
int fio_flags = 0; int fio_flags = 0;
char_u *fenc; /* fileencoding to use */ char_u *fenc; /* fileencoding to use */
@ -754,13 +759,18 @@ readfile(fname, sfname, from, lines_to_skip, lines_to_read, eap, flags)
linecnt = curbuf->b_ml.ml_line_count; linecnt = curbuf->b_ml.ml_line_count;
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
/* "++bad=" argument. */
if (eap != NULL && eap->bad_char != 0)
bad_char_behavior = eap->bad_char;
/* /*
* Decide which 'encoding' to use first. * Decide which 'encoding' to use or use first.
*/ */
if (eap != NULL && eap->force_enc != 0) if (eap != NULL && eap->force_enc != 0)
{ {
fenc = enc_canonize(eap->cmd + eap->force_enc); fenc = enc_canonize(eap->cmd + eap->force_enc);
fenc_alloced = TRUE; fenc_alloced = TRUE;
keep_dest_enc = TRUE;
} }
else if (curbuf->b_p_bin) else if (curbuf->b_p_bin)
{ {
@ -864,7 +874,7 @@ retry:
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
if (newfile) if (newfile)
curbuf->b_p_bomb = FALSE; curbuf->b_p_bomb = FALSE;
conv_error = FALSE; conv_error = 0;
#endif #endif
} }
@ -908,7 +918,7 @@ retry:
/* Conversion given with "++cc=" wasn't possible, read /* Conversion given with "++cc=" wasn't possible, read
* without conversion. */ * without conversion. */
notconverted = TRUE; notconverted = TRUE;
conv_error = FALSE; conv_error = 0;
if (fenc_alloced) if (fenc_alloced)
vim_free(fenc); vim_free(fenc);
fenc = (char_u *)""; fenc = (char_u *)"";
@ -1043,11 +1053,10 @@ retry:
} }
} }
/* Set can_retry when it's possible to rewind the file and try with /* Set "can_retry" when it's possible to rewind the file and try with
* another "fenc" value. It's FALSE when no other "fenc" to try, reading * another "fenc" value. It's FALSE when no other "fenc" to try, reading
* stdin or "fenc" was specified with "++enc=". */ * stdin or fixed at a specific encoding. */
can_retry = (*fenc != NUL && !read_stdin can_retry = (*fenc != NUL && !read_stdin && !keep_dest_enc);
&& (eap == NULL || eap->force_enc == 0));
#endif #endif
if (!skip_read) if (!skip_read)
@ -1229,8 +1238,30 @@ retry:
error = TRUE; error = TRUE;
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
else if (conv_restlen > 0) else if (conv_restlen > 0)
/* some trailing bytes unconverted */ {
conv_error = TRUE; /* Reached end-of-file but some trailing bytes could
* not be converted. Trucated file? */
if (conv_error == 0)
conv_error = linecnt;
if (bad_char_behavior != BAD_DROP)
{
fio_flags = 0; /* don't convert this */
if (bad_char_behavior == BAD_KEEP)
{
/* Keep the trailing bytes as-is. */
size = conv_restlen;
ptr -= conv_restlen;
}
else
{
/* Replace the trailing bytes with the
* replacement character. */
size = 1;
*--ptr = bad_char_behavior;
}
conv_restlen = 0;
}
}
#endif #endif
} }
@ -1349,17 +1380,26 @@ retry:
== (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL) == (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL)
|| from_size > CONV_RESTLEN) || from_size > CONV_RESTLEN)
{ {
if (!keep_dest_enc && can_retry) if (can_retry)
goto rewind_retry; goto rewind_retry;
if (!keep_dest_enc) if (conv_error == 0)
conv_error = TRUE; conv_error = readfile_linenr(linecnt,
ptr, (char_u *)top);
/* Ignore a byte and try again. */ /* Deal with a bad byte and continue with the next. */
++fromp; ++fromp;
--from_size; --from_size;
*top++ = '?'; if (bad_char_behavior == BAD_KEEP)
{
*top++ = *(fromp - 1);
--to_size; --to_size;
} }
else if (bad_char_behavior != BAD_DROP)
{
*top++ = bad_char_behavior;
--to_size;
}
}
if (from_size > 0) if (from_size > 0)
{ {
@ -1379,142 +1419,168 @@ retry:
# ifdef WIN3264 # ifdef WIN3264
if (fio_flags & FIO_CODEPAGE) if (fio_flags & FIO_CODEPAGE)
{ {
char_u *src, *dst;
int u8c;
WCHAR ucs2buf[3];
int ucs2len;
int codepage = FIO_GET_CP(fio_flags);
int bytelen;
int found_bad;
char replstr[2];
/* /*
* Conversion from an MS-Windows codepage or UTF-8 to UTF-8 or * Conversion from an MS-Windows codepage or UTF-8 to UTF-8 or
* a codepage, using standard MS-Windows functions. * a codepage, using standard MS-Windows functions. This
* 1. find out how many ucs-2 characters there are. * requires two steps:
* 2. convert from 'fileencoding' to ucs-2 * 1. convert from 'fileencoding' to ucs-2
* 3. convert from ucs-2 to 'encoding' * 2. convert from ucs-2 to 'encoding'
*
* Because there may be illegal bytes AND an incomplete byte
* sequence at the end, we may have to do the conversion one
* character at a time to get it right.
*/ */
char_u *ucsp;
size_t from_size = size; /* Replacement string for WideCharToMultiByte(). */
int needed; if (bad_char_behavior > 0)
char_u *p; replstr[0] = bad_char_behavior;
int u8c; else
replstr[0] = '?';
replstr[1] = NUL;
/* /*
* 1. find out how many ucs-2 characters there are. * Move the bytes to the end of the buffer, so that we have
* room to put the result at the start.
*/ */
# ifdef CP_UTF8 /* VC 4.1 doesn't define CP_UTF8 */ src = ptr + real_size - size;
if (FIO_GET_CP(fio_flags) == CP_UTF8) mch_memmove(src, ptr, size);
{
int l, flen;
/* Handle CP_UTF8 ourselves to be able to handle trailing /*
* bytes properly. First find out the number of * Do the conversion.
* characters and check for trailing bytes. */ */
needed = 0; dst = ptr;
p = ptr; size = size;
for (flen = from_size; flen > 0; flen -= l) while (size > 0)
{ {
l = utf_ptr2len_len(p, flen); found_bad = FALSE;
if (l > flen) /* incomplete char */
# ifdef CP_UTF8 /* VC 4.1 doesn't define CP_UTF8 */
if (codepage == CP_UTF8)
{ {
if (l > CONV_RESTLEN) /* Handle CP_UTF8 input ourselves to be able to handle
* trailing bytes properly.
* Get one UTF-8 character from src. */
bytelen = utf_ptr2len_len(src, size);
if (bytelen > size)
{
/* Only got some bytes of a character. Normally
* it's put in "conv_rest", but if it's too long
* deal with it as if they were illegal bytes. */
if (bytelen <= CONV_RESTLEN)
break;
/* weird overlong byte sequence */ /* weird overlong byte sequence */
goto rewind_retry; bytelen = size;
mch_memmove(conv_rest, p, flen); found_bad = TRUE;
conv_restlen = flen; }
from_size -= flen; else
{
u8c = utf_ptr2char(src);
if (u8c > 0xffff)
found_bad = TRUE;
ucs2buf[0] = u8c;
ucs2len = 1;
}
}
else
# endif
{
/* We don't know how long the byte sequence is, try
* from one to three bytes. */
for (bytelen = 1; bytelen <= size && bytelen <= 3;
++bytelen)
{
ucs2len = MultiByteToWideChar(codepage,
MB_ERR_INVALID_CHARS,
(LPCSTR)src, bytelen,
ucs2buf, 3);
if (ucs2len > 0)
break; break;
} }
if (l == 1 && *p >= 0x80) /* illegal byte */ if (ucs2len == 0)
goto rewind_retry;
++needed;
p += l;
}
}
else
# endif
{ {
/* We can't tell if the last byte of an MBCS string is /* If we have only one byte then it's probably an
* valid and MultiByteToWideChar() returns zero if it * incomplete byte sequence. Otherwise discard
* isn't. Try the whole string, and if that fails, bump * one byte as a bad character. */
* the last byte into conv_rest and try again. */ if (size == 1)
needed = MultiByteToWideChar(FIO_GET_CP(fio_flags), break;
MB_ERR_INVALID_CHARS, (LPCSTR)ptr, from_size, found_bad = TRUE;
NULL, 0); bytelen = 1;
if (needed == 0) }
}
if (!found_bad)
{ {
conv_rest[0] = ptr[from_size - 1]; int i;
conv_restlen = 1;
--from_size;
needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
MB_ERR_INVALID_CHARS, (LPCSTR)ptr, from_size,
NULL, 0);
}
/* If there really is a conversion error, try using another /* Convert "ucs2buf[ucs2len]" to 'enc' in "dst". */
* conversion. */
if (needed == 0)
goto rewind_retry;
}
/*
* 2. convert from 'fileencoding' to ucs-2
*
* Put the result of conversion to UCS-2 at the end of the
* buffer, then convert from UCS-2 to UTF-8 or "enc_codepage"
* into the start of the buffer. If there is not enough space
* just fail, there is probably something wrong.
*/
ucsp = ptr + real_size - (needed * sizeof(WCHAR));
if (ucsp < ptr + size)
goto rewind_retry;
# ifdef CP_UTF8 /* VC 4.1 doesn't define CP_UTF8 */
if (FIO_GET_CP(fio_flags) == CP_UTF8)
{
int l, flen;
/* Convert from utf-8 to ucs-2. */
needed = 0;
p = ptr;
for (flen = from_size; flen > 0; flen -= l)
{
l = utf_ptr2len_len(p, flen);
u8c = utf_ptr2char(p);
ucsp[needed * 2] = (u8c & 0xff);
ucsp[needed * 2 + 1] = (u8c >> 8);
++needed;
p += l;
}
}
else
# endif
needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
MB_ERR_INVALID_CHARS, (LPCSTR)ptr,
from_size, (LPWSTR)ucsp, needed);
/*
* 3. convert from ucs-2 to 'encoding'
*/
if (enc_utf8) if (enc_utf8)
{ {
/* From UCS-2 to UTF-8. Cannot fail. */ /* From UCS-2 to UTF-8. Cannot fail. */
p = ptr; for (i = 0; i < ucs2len; ++i)
for (; needed > 0; --needed) dst += utf_char2bytes(ucs2buf[i], dst);
{
u8c = *ucsp++;
u8c += (*ucsp++ << 8);
p += utf_char2bytes(u8c, p);
}
size = p - ptr;
} }
else else
{ {
BOOL bad = FALSE; BOOL bad = FALSE;
int dstlen;
/* From UCS-2 to "enc_codepage". If the conversion uses /* From UCS-2 to "enc_codepage". If the
* the default character "?", the data doesn't fit in this * conversion uses the default character "?",
* encoding, so fail (unless forced). */ * the data doesn't fit in this encoding. */
size = WideCharToMultiByte(enc_codepage, 0, dstlen = WideCharToMultiByte(enc_codepage, 0,
(LPCWSTR)ucsp, needed, (LPCWSTR)ucs2buf, ucs2len,
(LPSTR)ptr, real_size, "?", &bad); (LPSTR)dst, (src - dst),
if (bad && !keep_dest_enc) replstr, &bad);
goto rewind_retry; if (bad)
found_bad = TRUE;
else
dst += dstlen;
} }
} }
if (found_bad)
{
/* Deal with bytes we can't convert. */
if (can_retry)
goto rewind_retry;
if (conv_error == 0)
conv_error = readfile_linenr(linecnt, ptr, dst);
if (bad_char_behavior != BAD_DROP)
{
if (bad_char_behavior == BAD_KEEP)
{
mch_memmove(dst, src, bytelen);
dst += bytelen;
}
else
*dst++ = bad_char_behavior;
}
}
src += bytelen;
size -= bytelen;
}
if (size > 0)
{
/* An incomplete byte sequence remaining. */
mch_memmove(conv_rest, src, size);
conv_restlen = size;
}
/* The new size is equal to how much "dst" was advanced. */
size = dst - ptr;
}
else else
# endif # endif
# ifdef MACOS_X # ifdef MACOS_X
@ -1628,7 +1694,13 @@ retry:
/* Missing leading word. */ /* Missing leading word. */
if (can_retry) if (can_retry)
goto rewind_retry; goto rewind_retry;
conv_error = TRUE; if (conv_error == 0)
conv_error = readfile_linenr(linecnt,
ptr, p);
if (bad_char_behavior == BAD_DROP)
continue;
if (bad_char_behavior != BAD_KEEP)
u8c = bad_char_behavior;
} }
/* found second word of double-word, get the first /* found second word of double-word, get the first
@ -1643,15 +1715,22 @@ retry:
u16c = *--p; u16c = *--p;
u16c += (*--p << 8); u16c += (*--p << 8);
} }
u8c = 0x10000 + ((u16c & 0x3ff) << 10)
+ (u8c & 0x3ff);
/* Check if the word is indeed a leading word. */ /* Check if the word is indeed a leading word. */
if (u16c < 0xd800 || u16c > 0xdbff) if (u16c < 0xd800 || u16c > 0xdbff)
{ {
if (can_retry) if (can_retry)
goto rewind_retry; goto rewind_retry;
conv_error = TRUE; if (conv_error == 0)
conv_error = readfile_linenr(linecnt,
ptr, p);
if (bad_char_behavior == BAD_DROP)
continue;
if (bad_char_behavior != BAD_KEEP)
u8c = bad_char_behavior;
} }
u8c = 0x10000 + ((u16c & 0x3ff) << 10)
+ (u8c & 0x3ff);
} }
} }
else if (fio_flags & FIO_UCS4) else if (fio_flags & FIO_UCS4)
@ -1678,6 +1757,8 @@ retry:
else else
{ {
len = utf_head_off(ptr, p); len = utf_head_off(ptr, p);
p -= len;
u8c = utf_ptr2char(p);
if (len == 0) if (len == 0)
{ {
/* Not a valid UTF-8 character, retry with /* Not a valid UTF-8 character, retry with
@ -1685,10 +1766,14 @@ retry:
* report the error. */ * report the error. */
if (can_retry) if (can_retry)
goto rewind_retry; goto rewind_retry;
conv_error = TRUE; if (conv_error == 0)
conv_error = readfile_linenr(linecnt,
ptr, p);
if (bad_char_behavior == BAD_DROP)
continue;
if (bad_char_behavior != BAD_KEEP)
u8c = bad_char_behavior;
} }
p -= len;
u8c = utf_ptr2char(p);
} }
} }
if (enc_utf8) /* produce UTF-8 */ if (enc_utf8) /* produce UTF-8 */
@ -1704,10 +1789,18 @@ retry:
/* character doesn't fit in latin1, retry with /* character doesn't fit in latin1, retry with
* another fenc when possible, otherwise just * another fenc when possible, otherwise just
* report the error. */ * report the error. */
if (can_retry && !keep_dest_enc) if (can_retry)
goto rewind_retry; goto rewind_retry;
if (conv_error == 0)
conv_error = readfile_linenr(linecnt, ptr, p);
if (bad_char_behavior == BAD_DROP)
++dest;
else if (bad_char_behavior == BAD_KEEP)
*dest = u8c;
else if (eap != NULL && eap->bad_char != 0)
*dest = bad_char_behavior;
else
*dest = 0xBF; *dest = 0xBF;
conv_error = TRUE;
} }
else else
*dest = u8c; *dest = u8c;
@ -1720,33 +1813,64 @@ retry:
size = (long)((ptr + real_size) - dest); size = (long)((ptr + real_size) - dest);
ptr = dest; ptr = dest;
} }
else if (enc_utf8 && !conv_error && !curbuf->b_p_bin) else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
{ {
/* Reading UTF-8: Check if the bytes are valid UTF-8. /* Reading UTF-8: Check if the bytes are valid UTF-8.
* Need to start before "ptr" when part of the character was * Need to start before "ptr" when part of the character was
* read in the previous read() call. */ * read in the previous read() call. */
for (p = ptr - utf_head_off(buffer, ptr); p < ptr + size; ++p) for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
{ {
int todo = (ptr + size) - p;
int l;
if (todo <= 0)
break;
if (*p >= 0x80) if (*p >= 0x80)
{ {
len = utf_ptr2len(p);
/* A length of 1 means it's an illegal byte. Accept /* A length of 1 means it's an illegal byte. Accept
* an incomplete character at the end though, the next * an incomplete character at the end though, the next
* read() will get the next bytes, we'll check it * read() will get the next bytes, we'll check it
* then. */ * then. */
if (len == 1) l = utf_ptr2len_len(p, todo);
if (l > todo)
{ {
p += utf_byte2len(*p) - 1; /* Incomplete byte sequence, the next read()
* should get them and check the bytes. */
p += todo;
break; break;
} }
p += len - 1; if (l == 1)
{
/* Illegal byte. If we can try another encoding
* do that. */
if (can_retry)
break;
/* Remember the first linenr with an illegal byte */
if (illegal_byte == 0)
illegal_byte = readfile_linenr(linecnt, ptr, p);
# ifdef USE_ICONV
/* When we did a conversion report an error. */
if (iconv_fd != (iconv_t)-1 && conv_error == 0)
conv_error = readfile_linenr(linecnt, ptr, p);
# endif
/* Drop, keep or replace the bad byte. */
if (bad_char_behavior == BAD_DROP)
{
mch_memmove(p, p+1, todo - 1);
--p;
--size;
}
else if (bad_char_behavior != BAD_KEEP)
*p = bad_char_behavior;
}
p += l - 1;
} }
} }
if (p < ptr + size) if (p < ptr + size)
{ {
/* Detected a UTF-8 error. */ /* Detected a UTF-8 error. */
if (can_retry)
{
rewind_retry: rewind_retry:
/* Retry reading with another conversion. */ /* Retry reading with another conversion. */
# if defined(FEAT_EVAL) && defined(USE_ICONV) # if defined(FEAT_EVAL) && defined(USE_ICONV)
@ -1760,24 +1884,6 @@ rewind_retry:
file_rewind = TRUE; file_rewind = TRUE;
goto retry; goto retry;
} }
/* There is no alternative fenc, just report the error. */
# ifdef USE_ICONV
if (iconv_fd != (iconv_t)-1)
conv_error = TRUE;
else
# endif
if (illegal_byte == 0) /* Keep the first linenr */
{
char_u *s;
/* Estimate the line number. */
illegal_byte = curbuf->b_ml.ml_line_count - linecnt + 1;
for (s = ptr; s < p; ++s)
if (*s == '\n')
++illegal_byte;
}
}
} }
#endif #endif
@ -2159,9 +2265,10 @@ failed:
} }
#endif #endif
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
if (conv_error) if (conv_error != 0)
{ {
STRCAT(IObuff, _("[CONVERSION ERROR]")); sprintf((char *)IObuff + STRLEN(IObuff),
_("[CONVERSION ERROR in line %ld]"), (long)conv_error);
c = TRUE; c = TRUE;
} }
else if (illegal_byte > 0) else if (illegal_byte > 0)
@ -2215,7 +2322,7 @@ failed:
/* with errors writing the file requires ":w!" */ /* with errors writing the file requires ":w!" */
if (newfile && (error if (newfile && (error
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
|| conv_error || conv_error != 0
#endif #endif
)) ))
curbuf->b_p_ro = TRUE; curbuf->b_p_ro = TRUE;
@ -2297,6 +2404,30 @@ failed:
return OK; return OK;
} }
#ifdef FEAT_MBYTE
/*
* From the current line count and characters read after that, estimate the
* line number where we are now.
* Used for error messages that include a line number.
*/
static linenr_T
readfile_linenr(linecnt, p, endp)
linenr_T linecnt; /* line count before reading more bytes */
char_u *p; /* start of more bytes read */
char_u *endp; /* end of more bytes read */
{
char_u *s;
linenr_T lnum;
lnum = curbuf->b_ml.ml_line_count - linecnt + 1;
for (s = p; s < endp; ++s)
if (*s == '\n')
++lnum;
return lnum;
}
#endif
/* /*
* Fill "*eap" to force the 'fileencoding' and 'fileformat' to be equal to the * Fill "*eap" to force the 'fileencoding' and 'fileformat' to be equal to the
* buffer "buf". Used for calling readfile(). * buffer "buf". Used for calling readfile().

View File

@ -6,18 +6,18 @@
* Do ":help credits" in Vim to see a list of people who contributed. * Do ":help credits" in Vim to see a list of people who contributed.
*/ */
/*
* For MSDOS some keys produce codes larger than 0xff. They are split into two
* chars, the first one is K_NUL (same value used in term.h).
*/
#define K_NUL (0xce) /* for MSDOS: special key follows */
/* /*
* Keycode definitions for special keys. * Keycode definitions for special keys.
* *
* Any special key code sequences are replaced by these codes. * Any special key code sequences are replaced by these codes.
*/ */
/*
* For MSDOS some keys produce codes larger than 0xff. They are split into two
* chars, the first one is K_NUL (same value used in term.h).
*/
#define K_NUL (0xce) /* for MSDOS: special key follows */
/* /*
* K_SPECIAL is the first byte of a special key code and is always followed by * K_SPECIAL is the first byte of a special key code and is always followed by
* two bytes. * two bytes.

View File

@ -2110,6 +2110,7 @@ op_colon(oap)
/* /*
* Handle the "gy" operator: call 'operatorfunc'. * Handle the "gy" operator: call 'operatorfunc'.
*/ */
/*ARGSUSED*/
void void
op_function(oap) op_function(oap)
oparg_T *oap; oparg_T *oap;

View File

@ -1923,4 +1923,9 @@ typedef int proftime_T; /* dummy for function prototypes */
#define VGR_GLOBAL 1 #define VGR_GLOBAL 1
#define VGR_NOJUMP 2 #define VGR_NOJUMP 2
/* behavior for bad character, "++bad=" argument */
#define BAD_REPLACE '?' /* replace it with '?' (default) */
#define BAD_KEEP -1 /* leave it */
#define BAD_DROP -2 /* erase it */
#endif /* VIM__H */ #endif /* VIM__H */