1
0
mirror of https://github.com/rfivet/uemacs.git synced 2024-12-21 00:26:27 -05:00

When reading file, identify encoding (ASCII, UTF-8, EXTENDED, MIXED).

This commit is contained in:
Renaud 2015-02-15 13:30:54 +08:00
parent fbe92a1f52
commit 115d9babae
3 changed files with 59 additions and 20 deletions

42
file.c
View File

@ -48,6 +48,13 @@ static const char *eolname[] = {
"MIXED" "MIXED"
} ; } ;
static const char *codename[] = {
"ASCII",
"UTF-8",
"EXTENDED",
"MIXED"
} ;
boolean restflag = FALSE ; /* restricted use? */ boolean restflag = FALSE ; /* restricted use? */
boolean resterr( void) { boolean resterr( void) {
@ -290,7 +297,7 @@ int readin(const char *fname, boolean lockfl)
eoltype found_eol ; eoltype found_eol ;
int nbytes; int nbytes;
int nline; int nline;
char mesg[NSTRING]; char *errmsg ;
#if (FILOCK && BSD) || SVR4 #if (FILOCK && BSD) || SVR4
if (lockfl && lockchk(fname) == ABORT) if (lockfl && lockchk(fname) == ABORT)
@ -370,28 +377,29 @@ int readin(const char *fname, boolean lockfl)
break ; break ;
default: default:
found_eol = EOL_MIXED ; found_eol = EOL_MIXED ;
curbp->b_mode |= MDVIEW ; /* add view mode as we have lost curbp->b_mode |= MDVIEW ; /* force view mode as we have lost
** information */ ** EOL information */
} }
ffclose(); /* Ignore errors. */ if( fcode == FCODE_UTF_8)
strcpy(mesg, "("); curbp->b_mode |= MDUTF8 ;
if( s == FIOERR) { if( s == FIOERR) {
strcat(mesg, "I/O ERROR, "); errmsg = "I/O ERROR, " ;
curbp->b_flag |= BFTRUNC ; curbp->b_flag |= BFTRUNC ;
} } else if( s == FIOMEM) {
if (s == FIOMEM) { errmsg = "OUT OF MEMORY, " ;
strcat(mesg, "OUT OF MEMORY, ");
curbp->b_flag |= BFTRUNC ; curbp->b_flag |= BFTRUNC ;
} } else
sprintf(&mesg[strlen(mesg)], "Read %d line", nline); errmsg = "" ;
if (nline != 1)
strcat(mesg, "s");
strcat( mesg, ", eol = ") ; mloutfmt( "(%sRead %d line%s, code/eol: %s/%s)",
strcat( mesg, eolname[ found_eol]) ; errmsg,
strcat(mesg, ")"); nline,
mloutstr( mesg) ; (nline != 1) ? "s" : "",
codename[ fcode & (FCODE_MASK -1)],
eolname[ found_eol]) ;
ffclose(); /* Ignore errors. */
out: out:
for (wp = wheadp; wp != NULL; wp = wp->w_wndp) { for (wp = wheadp; wp != NULL; wp = wp->w_wndp) {

View File

@ -21,6 +21,7 @@
#include "defines.h" #include "defines.h"
#include "retcode.h" #include "retcode.h"
#include "utf8.h"
#if CRYPT #if CRYPT
boolean is_crypted ; /* currently encrypting? */ boolean is_crypted ; /* currently encrypting? */
@ -29,6 +30,7 @@ boolean is_crypted ; /* currently encrypting? */
char *fline = NULL ; /* dynamic return line */ char *fline = NULL ; /* dynamic return line */
int flen = 0 ; /* current allocated length of fline */ int flen = 0 ; /* current allocated length of fline */
int ftype ; int ftype ;
int fcode ; /* encoding type FCODE_xxxxx */
int fpayload ; /* actual length of fline content */ int fpayload ; /* actual length of fline content */
@ -45,6 +47,7 @@ fio_code ffropen( const char *fn)
return FIOFNF; return FIOFNF;
eofflag = FALSE; eofflag = FALSE;
ftype = FTYPE_NONE ; ftype = FTYPE_NONE ;
fcode = FCODE_ASCII ;
return FIOSUC; return FIOSUC;
} }
@ -79,6 +82,7 @@ fio_code ffclose(void)
} }
eofflag = FALSE; eofflag = FALSE;
ftype = FTYPE_NONE ; ftype = FTYPE_NONE ;
fcode = FCODE_ASCII ;
#if MSDOS & CTRLZ #if MSDOS & CTRLZ
fputc(26, ffp); /* add a ^Z at the end of the file */ fputc(26, ffp); /* add a ^Z at the end of the file */
@ -136,6 +140,7 @@ fio_code ffgetline(void)
{ {
int c; /* current character read */ int c; /* current character read */
int i; /* current index into fline */ int i; /* current index into fline */
int lcode = FCODE_ASCII ; /* line encoding, defaults to ASCII */
/* if we are at the end...return it */ /* if we are at the end...return it */
if (eofflag) if (eofflag)
@ -156,6 +161,7 @@ fio_code ffgetline(void)
i = 0; i = 0;
while ((c = fgetc(ffp)) != EOF && c != '\r' && c != '\n') { while ((c = fgetc(ffp)) != EOF && c != '\r' && c != '\n') {
fline[i++] = c; fline[i++] = c;
lcode |= c ;
/* if it's longer, get more room */ /* if it's longer, get more room */
if (i >= flen) { if (i >= flen) {
char *tmpline; /* temp storage for expanding line */ char *tmpline; /* temp storage for expanding line */
@ -173,6 +179,24 @@ fio_code ffgetline(void)
} }
fpayload = i ; fpayload = i ;
lcode &= FCODE_MASK ;
if( lcode && (fcode != FCODE_MIXED)) { /* line contains extended chars */
/* Check if consistent UTF-8 encoding */
int bytes ;
int pos = 0 ;
unicode_t uc ;
while( (pos < i) && (lcode != FCODE_MIXED)) {
bytes = utf8_to_unicode( fline, pos, i, &uc) ;
pos += bytes ;
if( bytes > 1) /* Multi byte UTF-8 sequence */
lcode |= FCODE_UTF_8 ;
else if( uc > 127) /* Extended ASCII */
lcode |= FCODE_EXTND ;
}
fcode |= lcode ;
}
/* test for any errors that may have occured */ /* test for any errors that may have occured */
if (c == EOF) { if (c == EOF) {

View File

@ -18,6 +18,12 @@ typedef enum {
#define FTYPE_MAC 4 #define FTYPE_MAC 4
/* FTYPE_MIXED [ 3, 5, 6, 7] */ /* FTYPE_MIXED [ 3, 5, 6, 7] */
#define FCODE_ASCII 0
#define FCODE_MASK 0x80
#define FCODE_UTF_8 0x81
#define FCODE_EXTND 0x82
#define FCODE_MIXED 0x83
#if CRYPT #if CRYPT
#include "retcode.h" #include "retcode.h"
@ -27,6 +33,7 @@ extern boolean is_crypted ; /* currently encrypting? */
extern char *fline ; /* dynamic return line */ extern char *fline ; /* dynamic return line */
extern int flen ; /* current allocated length of fline */ extern int flen ; /* current allocated length of fline */
extern int ftype ; extern int ftype ;
extern int fcode ; /* encoding type */
extern int fpayload ; /* actual length of fline content */ extern int fpayload ; /* actual length of fline content */
fio_code ffclose( void) ; fio_code ffclose( void) ;