mirror of
https://github.com/rfivet/uemacs.git
synced 2024-12-20 16:22:38 -05:00
When reading file, identify encoding (ASCII, UTF-8, EXTENDED, MIXED).
This commit is contained in:
parent
fbe92a1f52
commit
115d9babae
42
file.c
42
file.c
@ -48,6 +48,13 @@ static const char *eolname[] = {
|
||||
"MIXED"
|
||||
} ;
|
||||
|
||||
static const char *codename[] = {
|
||||
"ASCII",
|
||||
"UTF-8",
|
||||
"EXTENDED",
|
||||
"MIXED"
|
||||
} ;
|
||||
|
||||
boolean restflag = FALSE ; /* restricted use? */
|
||||
|
||||
boolean resterr( void) {
|
||||
@ -290,7 +297,7 @@ int readin(const char *fname, boolean lockfl)
|
||||
eoltype found_eol ;
|
||||
int nbytes;
|
||||
int nline;
|
||||
char mesg[NSTRING];
|
||||
char *errmsg ;
|
||||
|
||||
#if (FILOCK && BSD) || SVR4
|
||||
if (lockfl && lockchk(fname) == ABORT)
|
||||
@ -370,28 +377,29 @@ int readin(const char *fname, boolean lockfl)
|
||||
break ;
|
||||
default:
|
||||
found_eol = EOL_MIXED ;
|
||||
curbp->b_mode |= MDVIEW ; /* add view mode as we have lost
|
||||
** information */
|
||||
curbp->b_mode |= MDVIEW ; /* force view mode as we have lost
|
||||
** EOL information */
|
||||
}
|
||||
|
||||
ffclose(); /* Ignore errors. */
|
||||
strcpy(mesg, "(");
|
||||
if( fcode == FCODE_UTF_8)
|
||||
curbp->b_mode |= MDUTF8 ;
|
||||
|
||||
if( s == FIOERR) {
|
||||
strcat(mesg, "I/O ERROR, ");
|
||||
errmsg = "I/O ERROR, " ;
|
||||
curbp->b_flag |= BFTRUNC ;
|
||||
}
|
||||
if (s == FIOMEM) {
|
||||
strcat(mesg, "OUT OF MEMORY, ");
|
||||
} else if( s == FIOMEM) {
|
||||
errmsg = "OUT OF MEMORY, " ;
|
||||
curbp->b_flag |= BFTRUNC ;
|
||||
}
|
||||
sprintf(&mesg[strlen(mesg)], "Read %d line", nline);
|
||||
if (nline != 1)
|
||||
strcat(mesg, "s");
|
||||
} else
|
||||
errmsg = "" ;
|
||||
|
||||
strcat( mesg, ", eol = ") ;
|
||||
strcat( mesg, eolname[ found_eol]) ;
|
||||
strcat(mesg, ")");
|
||||
mloutstr( mesg) ;
|
||||
mloutfmt( "(%sRead %d line%s, code/eol: %s/%s)",
|
||||
errmsg,
|
||||
nline,
|
||||
(nline != 1) ? "s" : "",
|
||||
codename[ fcode & (FCODE_MASK -1)],
|
||||
eolname[ found_eol]) ;
|
||||
ffclose(); /* Ignore errors. */
|
||||
|
||||
out:
|
||||
for (wp = wheadp; wp != NULL; wp = wp->w_wndp) {
|
||||
|
24
fileio.c
24
fileio.c
@ -21,6 +21,7 @@
|
||||
|
||||
#include "defines.h"
|
||||
#include "retcode.h"
|
||||
#include "utf8.h"
|
||||
|
||||
#if CRYPT
|
||||
boolean is_crypted ; /* currently encrypting? */
|
||||
@ -29,6 +30,7 @@ boolean is_crypted ; /* currently encrypting? */
|
||||
char *fline = NULL ; /* dynamic return line */
|
||||
int flen = 0 ; /* current allocated length of fline */
|
||||
int ftype ;
|
||||
int fcode ; /* encoding type FCODE_xxxxx */
|
||||
int fpayload ; /* actual length of fline content */
|
||||
|
||||
|
||||
@ -45,6 +47,7 @@ fio_code ffropen( const char *fn)
|
||||
return FIOFNF;
|
||||
eofflag = FALSE;
|
||||
ftype = FTYPE_NONE ;
|
||||
fcode = FCODE_ASCII ;
|
||||
return FIOSUC;
|
||||
}
|
||||
|
||||
@ -79,6 +82,7 @@ fio_code ffclose(void)
|
||||
}
|
||||
eofflag = FALSE;
|
||||
ftype = FTYPE_NONE ;
|
||||
fcode = FCODE_ASCII ;
|
||||
|
||||
#if MSDOS & CTRLZ
|
||||
fputc(26, ffp); /* add a ^Z at the end of the file */
|
||||
@ -136,6 +140,7 @@ fio_code ffgetline(void)
|
||||
{
|
||||
int c; /* current character read */
|
||||
int i; /* current index into fline */
|
||||
int lcode = FCODE_ASCII ; /* line encoding, defaults to ASCII */
|
||||
|
||||
/* if we are at the end...return it */
|
||||
if (eofflag)
|
||||
@ -156,6 +161,7 @@ fio_code ffgetline(void)
|
||||
i = 0;
|
||||
while ((c = fgetc(ffp)) != EOF && c != '\r' && c != '\n') {
|
||||
fline[i++] = c;
|
||||
lcode |= c ;
|
||||
/* if it's longer, get more room */
|
||||
if (i >= flen) {
|
||||
char *tmpline; /* temp storage for expanding line */
|
||||
@ -173,6 +179,24 @@ fio_code ffgetline(void)
|
||||
}
|
||||
|
||||
fpayload = i ;
|
||||
lcode &= FCODE_MASK ;
|
||||
if( lcode && (fcode != FCODE_MIXED)) { /* line contains extended chars */
|
||||
/* Check if consistent UTF-8 encoding */
|
||||
int bytes ;
|
||||
int pos = 0 ;
|
||||
unicode_t uc ;
|
||||
|
||||
while( (pos < i) && (lcode != FCODE_MIXED)) {
|
||||
bytes = utf8_to_unicode( fline, pos, i, &uc) ;
|
||||
pos += bytes ;
|
||||
if( bytes > 1) /* Multi byte UTF-8 sequence */
|
||||
lcode |= FCODE_UTF_8 ;
|
||||
else if( uc > 127) /* Extended ASCII */
|
||||
lcode |= FCODE_EXTND ;
|
||||
}
|
||||
|
||||
fcode |= lcode ;
|
||||
}
|
||||
|
||||
/* test for any errors that may have occured */
|
||||
if (c == EOF) {
|
||||
|
7
fileio.h
7
fileio.h
@ -18,6 +18,12 @@ typedef enum {
|
||||
#define FTYPE_MAC 4
|
||||
/* FTYPE_MIXED [ 3, 5, 6, 7] */
|
||||
|
||||
#define FCODE_ASCII 0
|
||||
#define FCODE_MASK 0x80
|
||||
#define FCODE_UTF_8 0x81
|
||||
#define FCODE_EXTND 0x82
|
||||
#define FCODE_MIXED 0x83
|
||||
|
||||
#if CRYPT
|
||||
#include "retcode.h"
|
||||
|
||||
@ -27,6 +33,7 @@ extern boolean is_crypted ; /* currently encrypting? */
|
||||
extern char *fline ; /* dynamic return line */
|
||||
extern int flen ; /* current allocated length of fline */
|
||||
extern int ftype ;
|
||||
extern int fcode ; /* encoding type */
|
||||
extern int fpayload ; /* actual length of fline content */
|
||||
|
||||
fio_code ffclose( void) ;
|
||||
|
Loading…
Reference in New Issue
Block a user