From 78543c55ee51f34c487d1a6712840651306376dd Mon Sep 17 00:00:00 2001 From: Witold Filipczyk Date: Sun, 24 Sep 2023 17:56:06 +0200 Subject: [PATCH] [table] skip_script --- src/document/html/parser/table.c | 44 +++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/document/html/parser/table.c b/src/document/html/parser/table.c index 9e0701100..d8665d536 100644 --- a/src/document/html/parser/table.c +++ b/src/document/html/parser/table.c @@ -44,8 +44,9 @@ add_table_bad_html_start(struct table *table, char *start) && !table->bad_html[table->bad_html_size - 1].end) return; - if (realloc_bad_html(&table->bad_html, table->bad_html_size)) + if (realloc_bad_html(&table->bad_html, table->bad_html_size)) { table->bad_html[table->bad_html_size++].start = start; + } } static void @@ -538,6 +539,42 @@ skip_table(char *html, char *eof) } } +static char * +skip_script(char *html, char *eof) +{ + int level = 1; + + while (1) { + char *name; + int namelen, closing_tag = 0; + + while (html < eof + && (*html != '<' + || parse_element(html, eof, &name, &namelen, NULL, + &html))) + html++; + + if (html >= eof) return eof; + + if (!namelen) continue; + + if (*name == '/') { + closing_tag = 1; + name++; namelen--; + if (!namelen) continue; + } + + if (!c_strlcasecmp(name, namelen, "SCRIPT", 6)) { + if (!closing_tag) { + level++; + } else { + level--; + if (!level) return html; + } + } + } +} + struct table * parse_table(char *html, char *eof, char **end, char *attr, int sh, struct html_context *html_context) @@ -608,6 +645,11 @@ see: closing_tag = 0; } + if (!c_strlcasecmp(name, namelen, "SCRIPT", 6)) { + en = skip_script(en, eof); + goto see; + } + if (!c_strlcasecmp(name, namelen, "TABLE", 5)) { if (!closing_tag) { en = skip_table(en, eof);