1
0
mirror of https://github.com/rfivet/uemacs.git synced 2024-12-18 23:36:23 -05:00

When reading file, identify encoding (ASCII, UTF-8, EXTENDED, MIXED).

This commit is contained in:
Renaud 2015-02-15 13:30:54 +08:00
parent fbe92a1f52
commit 115d9babae
3 changed files with 59 additions and 20 deletions

48
file.c
View File

@ -48,6 +48,13 @@ static const char *eolname[] = {
"MIXED"
} ;
static const char *codename[] = {
"ASCII",
"UTF-8",
"EXTENDED",
"MIXED"
} ;
boolean restflag = FALSE ; /* restricted use? */
boolean resterr( void) {
@ -290,7 +297,7 @@ int readin(const char *fname, boolean lockfl)
eoltype found_eol ;
int nbytes;
int nline;
char mesg[NSTRING];
char *errmsg ;
#if (FILOCK && BSD) || SVR4
if (lockfl && lockchk(fname) == ABORT)
@ -370,28 +377,29 @@ int readin(const char *fname, boolean lockfl)
break ;
default:
found_eol = EOL_MIXED ;
curbp->b_mode |= MDVIEW ; /* add view mode as we have lost
** information */
curbp->b_mode |= MDVIEW ; /* force view mode as we have lost
** EOL information */
}
ffclose(); /* Ignore errors. */
strcpy(mesg, "(");
if (s == FIOERR) {
strcat(mesg, "I/O ERROR, ");
curbp->b_flag |= BFTRUNC;
}
if (s == FIOMEM) {
strcat(mesg, "OUT OF MEMORY, ");
curbp->b_flag |= BFTRUNC;
}
sprintf(&mesg[strlen(mesg)], "Read %d line", nline);
if (nline != 1)
strcat(mesg, "s");
if( fcode == FCODE_UTF_8)
curbp->b_mode |= MDUTF8 ;
strcat( mesg, ", eol = ") ;
strcat( mesg, eolname[ found_eol]) ;
strcat(mesg, ")");
mloutstr( mesg) ;
if( s == FIOERR) {
errmsg = "I/O ERROR, " ;
curbp->b_flag |= BFTRUNC ;
} else if( s == FIOMEM) {
errmsg = "OUT OF MEMORY, " ;
curbp->b_flag |= BFTRUNC ;
} else
errmsg = "" ;
mloutfmt( "(%sRead %d line%s, code/eol: %s/%s)",
errmsg,
nline,
(nline != 1) ? "s" : "",
codename[ fcode & (FCODE_MASK -1)],
eolname[ found_eol]) ;
ffclose(); /* Ignore errors. */
out:
for (wp = wheadp; wp != NULL; wp = wp->w_wndp) {

View File

@ -21,6 +21,7 @@
#include "defines.h"
#include "retcode.h"
#include "utf8.h"
#if CRYPT
boolean is_crypted ; /* currently encrypting? */
@ -29,6 +30,7 @@ boolean is_crypted ; /* currently encrypting? */
char *fline = NULL ; /* dynamic return line */
int flen = 0 ; /* current allocated length of fline */
int ftype ;
int fcode ; /* encoding type FCODE_xxxxx */
int fpayload ; /* actual length of fline content */
@ -45,6 +47,7 @@ fio_code ffropen( const char *fn)
return FIOFNF;
eofflag = FALSE;
ftype = FTYPE_NONE ;
fcode = FCODE_ASCII ;
return FIOSUC;
}
@ -79,6 +82,7 @@ fio_code ffclose(void)
}
eofflag = FALSE;
ftype = FTYPE_NONE ;
fcode = FCODE_ASCII ;
#if MSDOS & CTRLZ
fputc(26, ffp); /* add a ^Z at the end of the file */
@ -136,6 +140,7 @@ fio_code ffgetline(void)
{
int c; /* current character read */
int i; /* current index into fline */
int lcode = FCODE_ASCII ; /* line encoding, defaults to ASCII */
/* if we are at the end...return it */
if (eofflag)
@ -156,6 +161,7 @@ fio_code ffgetline(void)
i = 0;
while ((c = fgetc(ffp)) != EOF && c != '\r' && c != '\n') {
fline[i++] = c;
lcode |= c ;
/* if it's longer, get more room */
if (i >= flen) {
char *tmpline; /* temp storage for expanding line */
@ -173,6 +179,24 @@ fio_code ffgetline(void)
}
fpayload = i ;
lcode &= FCODE_MASK ;
if( lcode && (fcode != FCODE_MIXED)) { /* line contains extended chars */
/* Check if consistent UTF-8 encoding */
int bytes ;
int pos = 0 ;
unicode_t uc ;
while( (pos < i) && (lcode != FCODE_MIXED)) {
bytes = utf8_to_unicode( fline, pos, i, &uc) ;
pos += bytes ;
if( bytes > 1) /* Multi byte UTF-8 sequence */
lcode |= FCODE_UTF_8 ;
else if( uc > 127) /* Extended ASCII */
lcode |= FCODE_EXTND ;
}
fcode |= lcode ;
}
/* test for any errors that may have occured */
if (c == EOF) {

View File

@ -18,6 +18,12 @@ typedef enum {
#define FTYPE_MAC 4
/* FTYPE_MIXED [ 3, 5, 6, 7] */
#define FCODE_ASCII 0
#define FCODE_MASK 0x80
#define FCODE_UTF_8 0x81
#define FCODE_EXTND 0x82
#define FCODE_MIXED 0x83
#if CRYPT
#include "retcode.h"
@ -27,6 +33,7 @@ extern boolean is_crypted ; /* currently encrypting? */
extern char *fline ; /* dynamic return line */
extern int flen ; /* current allocated length of fline */
extern int ftype ;
extern int fcode ; /* encoding type */
extern int fpayload ; /* actual length of fline content */
fio_code ffclose( void) ;