mirror of
https://github.com/rkd77/elinks.git
synced 2025-01-03 14:57:44 -05:00
[python] If page encoding is not "utf-8", encode it twice in pre_format_html_hook.
Once to utf-8 before pre_format_html_hook, and second time to the original encoding after python script execution. I know it is inefficient, but computers are quite fast nowadays.
This commit is contained in:
parent
bec41b6e2f
commit
8b8f57ed75
@ -7,6 +7,7 @@
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#include <iconv.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
@ -14,6 +15,7 @@
|
||||
|
||||
#include "cache/cache.h"
|
||||
#include "main/event.h"
|
||||
#include "protocol/header.h"
|
||||
#include "protocol/uri.h"
|
||||
#include "scripting/python/core.h"
|
||||
#include "session/session.h"
|
||||
@ -79,6 +81,60 @@ script_hook_url(va_list ap, void *data)
|
||||
return EVENT_HOOK_STATUS_NEXT;
|
||||
}
|
||||
|
||||
static int
|
||||
get_codepage(unsigned char *head)
|
||||
{
|
||||
int cp_index = -1;
|
||||
unsigned char *part = head;
|
||||
|
||||
if (!head) {
|
||||
goto none;
|
||||
}
|
||||
while (cp_index == -1) {
|
||||
unsigned char *ct_charset;
|
||||
/* scan_http_equiv() appends the meta http-equiv directives to
|
||||
* the protocol header before this function is called, but the
|
||||
* HTTP Content-Type header has precedence, so the HTTP header
|
||||
* will be used if it exists and the meta header is only used
|
||||
* as a fallback. See bug 983. */
|
||||
unsigned char *a = parse_header(part, "Content-Type", &part);
|
||||
|
||||
if (!a) break;
|
||||
|
||||
parse_header_param(a, "charset", &ct_charset, 0);
|
||||
if (ct_charset) {
|
||||
cp_index = get_cp_index(ct_charset);
|
||||
mem_free(ct_charset);
|
||||
}
|
||||
mem_free(a);
|
||||
}
|
||||
|
||||
if (cp_index == -1) {
|
||||
unsigned char *a = parse_header(head, "Content-Charset", NULL);
|
||||
|
||||
if (a) {
|
||||
cp_index = get_cp_index(a);
|
||||
mem_free(a);
|
||||
}
|
||||
}
|
||||
|
||||
if (cp_index == -1) {
|
||||
unsigned char *a = parse_header(head, "Charset", NULL);
|
||||
|
||||
if (a) {
|
||||
cp_index = get_cp_index(a);
|
||||
mem_free(a);
|
||||
}
|
||||
}
|
||||
|
||||
none:
|
||||
if (cp_index == -1) {
|
||||
cp_index = get_cp_index("System");
|
||||
}
|
||||
|
||||
return cp_index;
|
||||
}
|
||||
|
||||
/* Call a Python hook for a pre-format-html event. */
|
||||
|
||||
static enum evhook_status
|
||||
@ -88,9 +144,10 @@ script_hook_pre_format_html(va_list ap, void *data)
|
||||
struct cache_entry *cached = va_arg(ap, struct cache_entry *);
|
||||
struct fragment *fragment = get_cache_fragment(cached);
|
||||
unsigned char *url = struri(cached->uri);
|
||||
int codepage = get_codepage(cached->head);
|
||||
char *method = "pre_format_html_hook";
|
||||
struct session *saved_python_ses = python_ses;
|
||||
PyObject *result;
|
||||
PyObject *result = NULL;
|
||||
int success = 0;
|
||||
|
||||
evhook_use_params(ses && cached);
|
||||
@ -101,8 +158,39 @@ script_hook_pre_format_html(va_list ap, void *data)
|
||||
|
||||
python_ses = ses;
|
||||
|
||||
result = PyObject_CallMethod(python_hooks, method, "ss#", url,
|
||||
fragment->data, fragment->length);
|
||||
if (!is_cp_utf8(codepage)) {
|
||||
size_t iconv_res;
|
||||
size_t ileft;
|
||||
size_t oleft;
|
||||
char *inbuf, *outbuf;
|
||||
char *utf8_data = mem_alloc(fragment->length * 8);
|
||||
iconv_t cd;
|
||||
|
||||
if (!utf8_data) {
|
||||
goto error;
|
||||
}
|
||||
cd = iconv_open("utf-8", get_cp_mime_name(codepage));
|
||||
if (cd == (iconv_t)-1) {
|
||||
mem_free(utf8_data);
|
||||
goto error;
|
||||
}
|
||||
inbuf = fragment->data;
|
||||
outbuf = utf8_data;
|
||||
ileft = fragment->length;
|
||||
oleft = fragment->length * 8;
|
||||
iconv_res = iconv(cd, &inbuf, &ileft, &outbuf, &oleft);
|
||||
|
||||
if (iconv_res == -1) {
|
||||
mem_free(utf8_data);
|
||||
goto error;
|
||||
}
|
||||
iconv_close(cd);
|
||||
|
||||
result = PyObject_CallMethod(python_hooks, method, "ss#", url, utf8_data, fragment->length * 8 - oleft);
|
||||
mem_free(utf8_data);
|
||||
} else {
|
||||
result = PyObject_CallMethod(python_hooks, method, "ss#", url, fragment->data, fragment->length);
|
||||
}
|
||||
if (!result) goto error;
|
||||
|
||||
if (result != Py_None) {
|
||||
@ -114,12 +202,43 @@ script_hook_pre_format_html(va_list ap, void *data)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* This assumes the Py_ssize_t len is not too large to
|
||||
* fit in the off_t parameter of normalize_cache_entry().
|
||||
* add_fragment() itself seems to assume the same thing,
|
||||
* and there is no standard OFF_MAX macro against which
|
||||
* ELinks could check the value. */
|
||||
(void) add_fragment(cached, 0, str, len);
|
||||
if (!is_cp_utf8(codepage)) {
|
||||
size_t iconv_res;
|
||||
size_t ileft;
|
||||
size_t oleft;
|
||||
char *inbuf, *outbuf;
|
||||
char *dec_data = mem_alloc(len * 4);
|
||||
iconv_t cd;
|
||||
|
||||
if (!dec_data) {
|
||||
goto error;
|
||||
}
|
||||
cd = iconv_open(get_cp_mime_name(codepage), "utf-8");
|
||||
if (cd == (iconv_t)-1) {
|
||||
mem_free(dec_data);
|
||||
goto error;
|
||||
}
|
||||
inbuf = str;
|
||||
outbuf = dec_data;
|
||||
ileft = len;
|
||||
oleft = len * 4;
|
||||
iconv_res = iconv(cd, &inbuf, &ileft, &outbuf, &oleft);
|
||||
|
||||
if (iconv_res == -1) {
|
||||
mem_free(dec_data);
|
||||
goto error;
|
||||
}
|
||||
iconv_close(cd);
|
||||
(void) add_fragment(cached, 0, dec_data, len * 4 - oleft);
|
||||
mem_free(dec_data);
|
||||
} else {
|
||||
/* This assumes the Py_ssize_t len is not too large to
|
||||
* fit in the off_t parameter of normalize_cache_entry().
|
||||
* add_fragment() itself seems to assume the same thing,
|
||||
* and there is no standard OFF_MAX macro against which
|
||||
* ELinks could check the value. */
|
||||
(void) add_fragment(cached, 0, str, len);
|
||||
}
|
||||
normalize_cache_entry(cached, len);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user