diff --git a/file.c b/file.c index 9e3fadd..8f43b8d 100644 --- a/file.c +++ b/file.c @@ -48,6 +48,13 @@ static const char *eolname[] = { "MIXED" } ; +static const char *codename[] = { + "ASCII", + "UTF-8", + "EXTENDED", + "MIXED" +} ; + boolean restflag = FALSE ; /* restricted use? */ boolean resterr( void) { @@ -290,7 +297,7 @@ int readin(const char *fname, boolean lockfl) eoltype found_eol ; int nbytes; int nline; - char mesg[NSTRING]; + char *errmsg ; #if (FILOCK && BSD) || SVR4 if (lockfl && lockchk(fname) == ABORT) @@ -370,28 +377,29 @@ int readin(const char *fname, boolean lockfl) break ; default: found_eol = EOL_MIXED ; - curbp->b_mode |= MDVIEW ; /* add view mode as we have lost - ** information */ + curbp->b_mode |= MDVIEW ; /* force view mode as we have lost + ** EOL information */ } - ffclose(); /* Ignore errors. */ - strcpy(mesg, "("); - if (s == FIOERR) { - strcat(mesg, "I/O ERROR, "); - curbp->b_flag |= BFTRUNC; - } - if (s == FIOMEM) { - strcat(mesg, "OUT OF MEMORY, "); - curbp->b_flag |= BFTRUNC; - } - sprintf(&mesg[strlen(mesg)], "Read %d line", nline); - if (nline != 1) - strcat(mesg, "s"); + if( fcode == FCODE_UTF_8) + curbp->b_mode |= MDUTF8 ; - strcat( mesg, ", eol = ") ; - strcat( mesg, eolname[ found_eol]) ; - strcat(mesg, ")"); - mloutstr( mesg) ; + if( s == FIOERR) { + errmsg = "I/O ERROR, " ; + curbp->b_flag |= BFTRUNC ; + } else if( s == FIOMEM) { + errmsg = "OUT OF MEMORY, " ; + curbp->b_flag |= BFTRUNC ; + } else + errmsg = "" ; + + mloutfmt( "(%sRead %d line%s, code/eol: %s/%s)", + errmsg, + nline, + (nline != 1) ? "s" : "", + codename[ fcode & (FCODE_MASK -1)], + eolname[ found_eol]) ; + ffclose(); /* Ignore errors. */ out: for (wp = wheadp; wp != NULL; wp = wp->w_wndp) { diff --git a/fileio.c b/fileio.c index 18812c3..991dd29 100644 --- a/fileio.c +++ b/fileio.c @@ -21,6 +21,7 @@ #include "defines.h" #include "retcode.h" +#include "utf8.h" #if CRYPT boolean is_crypted ; /* currently encrypting? */ @@ -29,6 +30,7 @@ boolean is_crypted ; /* currently encrypting? */ char *fline = NULL ; /* dynamic return line */ int flen = 0 ; /* current allocated length of fline */ int ftype ; +int fcode ; /* encoding type FCODE_xxxxx */ int fpayload ; /* actual length of fline content */ @@ -45,6 +47,7 @@ fio_code ffropen( const char *fn) return FIOFNF; eofflag = FALSE; ftype = FTYPE_NONE ; + fcode = FCODE_ASCII ; return FIOSUC; } @@ -79,6 +82,7 @@ fio_code ffclose(void) } eofflag = FALSE; ftype = FTYPE_NONE ; + fcode = FCODE_ASCII ; #if MSDOS & CTRLZ fputc(26, ffp); /* add a ^Z at the end of the file */ @@ -136,6 +140,7 @@ fio_code ffgetline(void) { int c; /* current character read */ int i; /* current index into fline */ + int lcode = FCODE_ASCII ; /* line encoding, defaults to ASCII */ /* if we are at the end...return it */ if (eofflag) @@ -156,6 +161,7 @@ fio_code ffgetline(void) i = 0; while ((c = fgetc(ffp)) != EOF && c != '\r' && c != '\n') { fline[i++] = c; + lcode |= c ; /* if it's longer, get more room */ if (i >= flen) { char *tmpline; /* temp storage for expanding line */ @@ -173,6 +179,24 @@ fio_code ffgetline(void) } fpayload = i ; + lcode &= FCODE_MASK ; + if( lcode && (fcode != FCODE_MIXED)) { /* line contains extended chars */ + /* Check if consistent UTF-8 encoding */ + int bytes ; + int pos = 0 ; + unicode_t uc ; + + while( (pos < i) && (lcode != FCODE_MIXED)) { + bytes = utf8_to_unicode( fline, pos, i, &uc) ; + pos += bytes ; + if( bytes > 1) /* Multi byte UTF-8 sequence */ + lcode |= FCODE_UTF_8 ; + else if( uc > 127) /* Extended ASCII */ + lcode |= FCODE_EXTND ; + } + + fcode |= lcode ; + } /* test for any errors that may have occured */ if (c == EOF) { diff --git a/fileio.h b/fileio.h index 3ee7ff2..a8d6770 100644 --- a/fileio.h +++ b/fileio.h @@ -18,6 +18,12 @@ typedef enum { #define FTYPE_MAC 4 /* FTYPE_MIXED [ 3, 5, 6, 7] */ +#define FCODE_ASCII 0 +#define FCODE_MASK 0x80 +#define FCODE_UTF_8 0x81 +#define FCODE_EXTND 0x82 +#define FCODE_MIXED 0x83 + #if CRYPT #include "retcode.h" @@ -27,6 +33,7 @@ extern boolean is_crypted ; /* currently encrypting? */ extern char *fline ; /* dynamic return line */ extern int flen ; /* current allocated length of fline */ extern int ftype ; +extern int fcode ; /* encoding type */ extern int fpayload ; /* actual length of fline content */ fio_code ffclose( void) ;