From 40b825eadd9d6b9ff42654b9b73a6f6516765e08 Mon Sep 17 00:00:00 2001 From: Witold Filipczyk Date: Tue, 16 May 2023 20:08:36 +0200 Subject: [PATCH] [document] pass charset to document_parse_text --- src/document/libdom/doc.c | 8 +++++--- src/document/libdom/doc.h | 2 +- src/ecmascript/libdom/mujs/implementation.c | 2 +- src/ecmascript/libdom/quickjs/implementation.c | 2 +- src/ecmascript/libdom/spidermonkey/implementation.cpp | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/document/libdom/doc.c b/src/document/libdom/doc.c index 4db70467..8d759d53 100644 --- a/src/document/libdom/doc.c +++ b/src/document/libdom/doc.c @@ -17,17 +17,18 @@ #include "cache/cache.h" #include "document/document.h" #include "document/libdom/doc.h" +#include "intl/charsets.h" #include "util/string.h" void * -document_parse_text(char *data, size_t length) +document_parse_text(const char *charset, char *data, size_t length) { dom_hubbub_parser *parser = NULL; dom_hubbub_error error; dom_hubbub_parser_params params; dom_document *doc; - params.enc = NULL; + params.enc = charset; params.fix_enc = true; params.enable_script = false; params.msg = NULL; @@ -72,12 +73,13 @@ document_parse(struct document *document) #endif struct cache_entry *cached = document->cached; struct fragment *f = get_cache_fragment(cached); + const char *charset = document->cp >= 0 ? get_cp_mime_name(document->cp) : ""; if (!f || !f->length) { return NULL; } - return document_parse_text(f->data, f->length); + return document_parse_text(charset, f->data, f->length); } void diff --git a/src/document/libdom/doc.h b/src/document/libdom/doc.h index f1347b51..c9635ba2 100644 --- a/src/document/libdom/doc.h +++ b/src/document/libdom/doc.h @@ -8,7 +8,7 @@ extern "C" { struct document; struct string; -void *document_parse_text(char *data, size_t length); +void *document_parse_text(const char *charset, char *data, size_t length); void *document_parse(struct document *document); void free_document(void *doc); diff --git a/src/ecmascript/libdom/mujs/implementation.c b/src/ecmascript/libdom/mujs/implementation.c index 6671ac2e..af1c7de1 100644 --- a/src/ecmascript/libdom/mujs/implementation.c +++ b/src/ecmascript/libdom/mujs/implementation.c @@ -40,7 +40,7 @@ mjs_implementation_createHTMLDocument(js_State *J) add_html_to_string(&str, title, strlen(title)); add_to_string(&str, ""); - void *docu = document_parse_text(str.source, str.length); + void *docu = document_parse_text("utf-8", str.source, str.length); done_string(&str); mjs_push_document(J, docu); } diff --git a/src/ecmascript/libdom/quickjs/implementation.c b/src/ecmascript/libdom/quickjs/implementation.c index 2d4be2ce..5b01c523 100644 --- a/src/ecmascript/libdom/quickjs/implementation.c +++ b/src/ecmascript/libdom/quickjs/implementation.c @@ -58,7 +58,7 @@ js_implementation_createHTMLDocument(JSContext *ctx, JSValueConst this_val, int add_html_to_string(&str, title, len); add_to_string(&str, ""); - void *docu = document_parse_text(str.source, str.length); + void *docu = document_parse_text("utf-8", str.source, str.length); done_string(&str); JS_FreeCString(ctx, title); diff --git a/src/ecmascript/libdom/spidermonkey/implementation.cpp b/src/ecmascript/libdom/spidermonkey/implementation.cpp index 522efe45..18f7ff38 100644 --- a/src/ecmascript/libdom/spidermonkey/implementation.cpp +++ b/src/ecmascript/libdom/spidermonkey/implementation.cpp @@ -78,7 +78,7 @@ implementation_createHTMLDocument(JSContext *ctx, unsigned int argc, JS::Value * add_html_to_string(&str, title, strlen(title)); add_to_string(&str, ""); - void *docu = document_parse_text(str.source, str.length); + void *docu = document_parse_text("utf-8", str.source, str.length); done_string(&str); mem_free(title);