diff --git a/src/document/html/parser/table.c b/src/document/html/parser/table.c
index b7563b544..1de5b9921 100644
--- a/src/document/html/parser/table.c
+++ b/src/document/html/parser/table.c
@@ -250,7 +250,7 @@ parse_table_attributes(struct table *table, char *attr, int real,
}
-static struct table *
+struct table *
new_table(void)
{
struct table *table = mem_calloc(1, sizeof(*table));
@@ -391,7 +391,7 @@ smart_raise(int target, int base, int unit, int limit)
return base;
}
-static struct table_cell *
+struct table_cell *
new_cell(struct table *table, int dest_col, int dest_row)
{
if (dest_col < table->cols && dest_row < table->rows)
@@ -434,7 +434,7 @@ new_cell(struct table *table, int dest_col, int dest_row)
}
}
-static void
+void
new_columns(struct table *table, int span, int width, int align,
int valign, int group)
{
@@ -464,7 +464,7 @@ new_columns(struct table *table, int span, int width, int align,
}
}
-static void
+void
set_td_width(struct table *table, int col, int width, int force)
{
if (col >= table->cols_x_count) {
diff --git a/src/document/html/parser/table.h b/src/document/html/parser/table.h
index a7b0a10fc..9ee1d4e82 100644
--- a/src/document/html/parser/table.h
+++ b/src/document/html/parser/table.h
@@ -42,11 +42,14 @@ struct part;
struct html_start_end {
char *start, *end;
+ void *start_node, *end_node;
};
struct table_cell {
char *start;
char *end;
+ void *start_node;
+ void *end_node;
char *fragment_id;
color_T bgcolor;
int col, row;
@@ -124,7 +127,11 @@ struct table *
parse_table(char *html, char *eof, char **end,
char *attr, int sh, struct html_context *html_context);
+struct table *new_table(void);
void free_table(struct table *table);
+void new_columns(struct table *table, int span, int width, int align, int valign, int group);
+struct table_cell *new_cell(struct table *table, int dest_col, int dest_row);
+void set_td_width(struct table *table, int col, int width, int force);
#ifdef __cplusplus
}
diff --git a/src/document/xml/tables.c b/src/document/xml/tables.c
index 21818c427..18873a582 100644
--- a/src/document/xml/tables.c
+++ b/src/document/xml/tables.c
@@ -146,6 +146,53 @@ tags_get_align(struct source_renderer *renderer, void *no, int *a)
mem_free(al);
}
+static void
+tags_get_valign(struct source_renderer *renderer, void *no, int *a)
+{
+ xmlpp::Element *node = no;
+ std::string valign_value = node->get_attribute_value("valign");
+ char *al = memacpy(valign_value.c_str(), valign_value.size());
+
+ if (!al) return;
+
+ if (!c_strcasecmp(al, "top")) *a = VALIGN_TOP;
+ else if (!c_strcasecmp(al, "middle")) *a = VALIGN_MIDDLE;
+ else if (!c_strcasecmp(al, "bottom")) *a = VALIGN_BOTTOM;
+ else if (!c_strcasecmp(al, "baseline")) *a = VALIGN_BASELINE; /* NOT IMPLEMENTED */
+ mem_free(al);
+}
+
+static void
+tags_get_column_width(struct source_renderer *renderer, void *no, int *width, int sh)
+{
+ struct html_context *html_context = renderer->html_context;
+ xmlpp::Element *node = no;
+ std::string width_value = node->get_attribute_value("width");
+ char *al = memacpy(width_value.c_str(), width_value.size());
+
+ int len;
+
+ if (!al) return;
+
+ len = strlen(al);
+ if (len && al[len - 1] == '*') {
+ char *en;
+ int n;
+
+ al[len - 1] = '\0';
+ errno = 0;
+ n = strtoul(al, (char **) &en, 10);
+ if (!errno && n >= 0 && (!*en || *en == '.'))
+ *width = WIDTH_RELATIVE - n;
+ } else {
+ int w = get_width2(al, sh, html_context);
+
+ if (w >= 0) *width = w;
+ }
+ mem_free(al);
+}
+
+
int
tags_get_bgcolor(struct source_renderer *renderer, void *no, color_T *rgb)
{
@@ -167,7 +214,7 @@ tags_get_bgcolor(struct source_renderer *renderer, void *no, color_T *rgb)
static void
-tags_parse_table_attributes(struct table *table, struct source_renderer *renderer, void *no, int real)
+tags_parse_table_attributes(struct source_renderer *renderer, struct table *table, void *no, int real)
{
struct html_context *html_context = renderer->html_context;
xmlpp::Element *node = no;
@@ -251,11 +298,527 @@ tags_parse_table_attributes(struct table *table, struct source_renderer *rendere
tags_get_bgcolor(renderer, no, &table->color.background);
}
+#define realloc_bad_html(bad_html, size) \
+ mem_align_alloc(bad_html, size, (size) + 1, 0xFF)
+
+static void
+tags_add_table_bad_html_start(struct table *table, void *start)
+{
+ if (table->caption.start_node && !table->caption.end_node)
+ return;
+
+ /* Either no bad html or last one not needing @end pointer */
+ if (table->bad_html_size
+ && !table->bad_html[table->bad_html_size - 1].end_node)
+ return;
+
+ if (realloc_bad_html(&table->bad_html, table->bad_html_size))
+ table->bad_html[table->bad_html_size++].start_node = start;
+}
+
+static void
+tags_add_table_bad_html_end(struct table *table, void *end)
+{
+ if (table->caption.start_node && !table->caption.end_node) {
+ table->caption.end_node = end;
+ return;
+ }
+
+ if (table->bad_html_size
+ && !table->bad_html[table->bad_html_size - 1].end_node)
+ table->bad_html[table->bad_html_size - 1].end_node = end;
+}
+
static struct table *
-tags_parse_table(struct source_renderer *renderer, int t, void *no)
+tags_parse_table(struct source_renderer *renderer, void *no, int sh)
{
struct html_context *html_context = renderer->html_context;
+ xmlpp::Element *node = no;
+ struct table *table;
+ struct table_cell *cell;
+ char *t_attr, *en, *name;
+ char *l_fragment_id = NULL;
+ color_T last_bgcolor;
+ int namelen;
+ int in_cell = 0;
+ int l_al = ALIGN_LEFT;
+ int l_val = VALIGN_MIDDLE;
+ int colspan, rowspan;
+ int group = 0;
+ int i, j, k;
+ int c_al = ALIGN_TR, c_val = VALIGN_TR, c_width = WIDTH_AUTO, c_span = 0;
+ int cols, rows;
+ int col = 0, row = -1;
+ int maxj;
+ int closing_tag, is_header;
+ unsigned char c;
+ char *colspa = NULL;
+ char *rowspa = NULL;
+ std::string colspan_value;
+ std::string rowspan_value;
+ std::string id_value;
+ std::string name_value;
+
+// *end = html;
+
+ table = new_table();
+ if (!table) return NULL;
+
+ tags_parse_table_attributes(renderer, table, no, sh);
+ last_bgcolor = table->color.background;
+
+se:
+// en = html;
+
+see:
+// html = en;
+ if (!in_cell) {
+ tags_add_table_bad_html_start(table, node);
+ }
+
+/// while (html < eof && *html != '<') html++;
+
+ if (false) { //&& html >= eof) {
+ if (in_cell) CELL(table, col, row)->end_node = node;
+ tags_add_table_bad_html_end(table, node);
+ goto scan_done;
+ }
+
+// if (html + 2 <= eof && (html[1] == '!' || html[1] == '?')) {
+// html = skip_comment(html, eof);
+// goto se;
+// }
+
+/// if (parse_element(html, eof, &name, &namelen, &t_attr, &en)) {
+/// html++;
+/// goto se;
+/// }
+ name_value = node->get_name();
+ if (name_value == "") goto see;
+ name = name_value.c_str();
+ namelen = name_value.size();
+
+// if (!namelen) goto see;
+
+// if (name[0] == '/') {
+// namelen--;
+// if (!namelen) goto see;
+// name++;
+// closing_tag = 1;
+//
+// } else {
+// closing_tag = 0;
+// }
+ closing_tag = 0;
+
+// if (!c_strlcasecmp(name, namelen, "TABLE", 5)) {
+// if (!closing_tag) {
+// en = skip_table(en, eof);
+// goto see;
+
+ if (!c_strlcasecmp(name_value.c_str(), name_value.size(), "TABLE", 5)) {
+ if (!closing_tag) {
+ //en = tags_skip_table(renderer, no);
+ node = node->get_next_sibling();
+ goto see;
+ } else {
+// if (c_span) new_columns(table, c_span, c_width, c_al, c_val, 1);
+// if (in_cell) CELL(table, col, row)->end = html;
+
+// add_table_bad_html_end(table, html);
+
+ if (c_span) new_columns(table, c_span, c_width, c_al, c_val, 1);
+ if (in_cell) CELL(table, col, row)->end_node = node;
+
+ tags_add_table_bad_html_end(table, node);
+
+ goto scan_done;
+ }
+ }
+
+// if (!c_strlcasecmp(name, namelen, "CAPTION", 7)) {
+// if (!closing_tag) {
+// add_table_bad_html_end(table, html);
+// if (!table->caption.start)
+// table->caption.start = html;
+
+// } else {
+// if (table->caption.start && !table->caption.end)
+// table->caption.end = html;
+// }
+
+// goto see;
+// }
+
+ if (!c_strlcasecmp(name_value.c_str(), name_value.size(), "CAPTION", 7)) {
+ if (!closing_tag) {
+ tags_add_table_bad_html_end(table, node);
+ if (!table->caption.start_node)
+ table->caption.start_node = node;
+ } else {
+ if (table->caption.start_node && !table->caption.end_node)
+ table->caption.end_node = node;
+ }
+
+ goto see;
+ }
+
+// if (!c_strlcasecmp(name, namelen, "COLGROUP", 8)) {
+// if (c_span) new_columns(table, c_span, c_width, c_al, c_val, 1);
+//
+// add_table_bad_html_end(table, html);
+//
+// c_al = ALIGN_TR;
+// c_val = VALIGN_TR;
+// c_width = WIDTH_AUTO;
+//
+// if (!closing_tag) {
+// get_align(html_context, t_attr, &c_al);
+// get_valign(html_context, t_attr, &c_val);
+// get_column_width(t_attr, &c_width, sh, html_context);
+// c_span = get_num(t_attr, "span", html_context->doc_cp);
+// if (c_span == -1)
+// c_span = 1;
+// else if (c_span > HTML_MAX_COLSPAN)
+// c_span = HTML_MAX_COLSPAN;
+//
+// } else {
+// c_span = 0;
+// }
+
+// goto see;
+// }
+
+ if (!c_strlcasecmp(name_value.c_str(), name_value.size(), "COLGROUP", 8)) {
+ if (c_span) new_columns(table, c_span, c_width, c_al, c_val, 1);
+
+ tags_add_table_bad_html_end(table, node);
+
+ c_al = ALIGN_TR;
+ c_val = VALIGN_TR;
+ c_width = WIDTH_AUTO;
+
+ if (!closing_tag) {
+ tags_get_align(renderer, no, &c_al);
+ tags_get_valign(renderer, no, &c_val);
+ tags_get_column_width(renderer, no, &c_width, sh);
+
+ std::string span_value = node->get_attribute_value("span");
+ char *spa = memacpy(span_value.c_str(), span_value.size());
+
+ c_span = get_num2(spa);
+ if (c_span == -1)
+ c_span = 1;
+ else if (c_span > HTML_MAX_COLSPAN)
+ c_span = HTML_MAX_COLSPAN;
+ } else {
+ c_span = 0;
+ }
+
+ goto see;
+ }
+
+// if (!closing_tag && !c_strlcasecmp(name, namelen, "COL", 3)) {
+// int sp, width, al, val;
+//
+// add_table_bad_html_end(table, html);
+//
+// sp = get_num(t_attr, "span", html_context->doc_cp);
+// if (sp == -1) sp = 1;
+// else if (sp > HTML_MAX_COLSPAN) sp = HTML_MAX_COLSPAN;
+//
+// width = c_width;
+// al = c_al;
+// val = c_val;
+// get_align(html_context, t_attr, &al);
+// get_valign(html_context, t_attr, &val);
+// get_column_width(t_attr, &width, sh, html_context);
+// new_columns(table, sp, width, al, val, !!c_span);
+// c_span = 0;
+// goto see;
+// }
+
+ if (!closing_tag && !c_strlcasecmp(name_value.c_str(), name_value.size(), "COL", 3)) {
+ int sp, width, al, val;
+
+ tags_add_table_bad_html_end(table, node);
+
+ std::string span_value = node->get_attribute_value("span");
+ char *spa = memacpy(span_value.c_str(), span_value.size());
+
+ sp = get_num2(spa);
+ if (sp == -1) sp = 1;
+ else if (sp > HTML_MAX_COLSPAN) sp = HTML_MAX_COLSPAN;
+
+ width = c_width;
+ al = c_al;
+ val = c_val;
+ tags_get_align(renderer, no, &al);
+ tags_get_valign(renderer, no, &val);
+ tags_get_column_width(renderer, no, &width, sh);
+ new_columns(table, sp, width, al, val, !!c_span);
+ c_span = 0;
+ goto see;
+ }
+
+ /* All following tags have T as first letter. */
+ if (c_toupper(name[0]) != 'T') goto see;
+
+ name++; namelen--;
+ if (namelen == 0) goto see;
+
+ c = c_toupper(name[0]);
+
+ /* /TR /TD /TH */
+ if (closing_tag && namelen == 1) {
+ if (c == 'R' || c == 'D' || c == 'H') {
+ if (c_span)
+ new_columns(table, c_span, c_width, c_al, c_val, 1);
+
+ if (in_cell) {
+ CELL(table, col, row)->end_node = node;
+ in_cell = 0;
+ }
+
+ tags_add_table_bad_html_end(table, node);
+ goto see;
+ }
+ }
+
+ /* Beyond that point, opening tags only. */
+ if (closing_tag) goto see;
+
+ /* THEAD TBODY TFOOT */
+ if (namelen == 4
+ && ((!c_strlcasecmp(name, namelen, "HEAD", 4)) ||
+ (!c_strlcasecmp(name, namelen, "BODY", 4)) ||
+ (!c_strlcasecmp(name, namelen, "FOOT", 4)))) {
+ if (c_span) new_columns(table, c_span, c_width, c_al, c_val, 1);
+
+ tags_add_table_bad_html_end(table, node);
+
+ group = 2;
+ goto see;
+ }
+
+ /* Beyond this point, only two letters tags. */
+ if (namelen != 1) goto see;
+
+ /* TR */
+ if (c == 'R') {
+ if (c_span) new_columns(table, c_span, c_width, c_al, c_val, 1);
+
+ if (in_cell) {
+ CELL(table, col, row)->end_node = node;
+ in_cell = 0;
+ }
+
+ tags_add_table_bad_html_end(table, node);
+
+ if (group) group--;
+ l_al = ALIGN_LEFT;
+ l_val = VALIGN_MIDDLE;
+ last_bgcolor = table->color.background;
+ tags_get_align(renderer, no, &l_al);
+ tags_get_valign(renderer, no, &l_val);
+ tags_get_bgcolor(renderer, no, &last_bgcolor);
+ std::string id_value = node->get_attribute_value("id");
+
+ mem_free_set(&l_fragment_id, memacpy(id_value.c_str(), id_value.size()));
+ row++;
+ col = 0;
+ goto see;
+ }
+
+ /* TD TH */
+ is_header = (c == 'H');
+
+ if (!is_header && c != 'D')
+ goto see;
+
+ if (c_span) new_columns(table, c_span, c_width, c_al, c_val, 1);
+
+ tags_add_table_bad_html_end(table, node);
+
+ if (in_cell) {
+ CELL(table, col, row)->end_node = node;
+ in_cell = 0;
+ }
+
+ if (row == -1) {
+ row = 0;
+ col = 0;
+ }
+
+ for (;;col++) {
+ cell = new_cell(table, col, row);
+ if (!cell) goto see;
+
+ if (!cell->is_used) break;
+ if (cell->colspan == -1) goto see;
+ }
+
+ in_cell = 1;
+
+ cell->col = col;
+ cell->row = row;
+ cell->is_used = 1;
+ cell->start = en;
+
+ cell->align = l_al;
+ cell->valign = l_val;
+
+ id_value = node->get_attribute_value("id");
+ mem_free_set(&cell->fragment_id, memacpy(id_value.c_str(), id_value.size()));
+
+// cell->fragment_id = get_attr_val(t_attr, "id", html_context->doc_cp);
+ if (!cell->fragment_id && l_fragment_id) {
+ cell->fragment_id = l_fragment_id;
+ l_fragment_id = NULL;
+ }
+
+ cell->is_header = is_header;
+ if (cell->is_header) cell->align = ALIGN_CENTER;
+
+ if (group == 1) cell->is_group = 1;
+
+ if (col < table->columns_count) {
+ if (table->columns[col].align != ALIGN_TR)
+ cell->align = table->columns[col].align;
+ if (table->columns[col].valign != VALIGN_TR)
+ cell->valign = table->columns[col].valign;
+ }
+
+ cell->bgcolor = last_bgcolor;
+
+ tags_get_align(renderer, no, &cell->align);
+ tags_get_valign(renderer, no, &cell->valign);
+ tags_get_bgcolor(renderer, no, &cell->bgcolor);
+
+ colspan_value = node->get_attribute_value("colspan");
+ colspa = memacpy(colspan_value.c_str(), colspan_value.size());
+
+ colspan = get_num2(colspa);
+ if (colspan == -1) colspan = 1;
+ else if (!colspan) colspan = -1;
+ else if (colspan > HTML_MAX_COLSPAN) colspan = HTML_MAX_COLSPAN;
+
+ rowspan_value = node->get_attribute_value("rowspan");
+ rowspa = memacpy(rowspan_value.c_str(), rowspan_value.size());
+
+ rowspan = get_num2(rowspa);
+ if (rowspan == -1) rowspan = 1;
+ else if (!rowspan) rowspan = -1;
+ else if (rowspan > HTML_MAX_ROWSPAN) rowspan = HTML_MAX_ROWSPAN;
+
+ cell->colspan = colspan;
+ cell->rowspan = rowspan;
+
+ if (colspan == 1) {
+ int width = WIDTH_AUTO;
+
+ tags_get_column_width(renderer, no, &width, sh);
+ if (width != WIDTH_AUTO)
+ set_td_width(table, col, width, 0);
+ }
+
+ cols = table->cols;
+ for (i = 1; colspan != -1 ? i < colspan : i < cols; i++) {
+ struct table_cell *span_cell = new_cell(table, col + i, row);
+
+ if (!span_cell)
+ goto abort;
+
+ if (span_cell->is_used) {
+ colspan = i;
+ for (k = 0; k < i; k++)
+ CELL(table, col + k, row)->colspan = colspan;
+ break;
+ }
+
+ span_cell->is_used = span_cell->is_spanned = 1;
+ span_cell->rowspan = rowspan;
+ span_cell->colspan = colspan;
+ span_cell->col = col;
+ span_cell->row = row;
+ }
+
+ rows = table->rows;
+ maxj = rowspan != -1 ? rowspan : rows;
+ /* Out of memory prevention, limit allocated memory to HTML_MAX_CELLS_MEMORY.
+ * Not perfect but better than nothing. */
+ if (maxj * i > HTML_MAX_CELLS_MEMORY / sizeof(*cell))
+ goto abort;
+
+ for (j = 1; j < maxj; j++) {
+ for (k = 0; k < i; k++) {
+ struct table_cell *span_cell = new_cell(table, col + k, row + j);
+
+ if (!span_cell)
+ goto abort;
+
+ if (span_cell->is_used) {
+ int l, m;
+
+ if (span_cell->col == col
+ && span_cell->row == row)
+ continue;
+
+ for (l = 0; l < k; l++)
+ memset(CELL(table, col + l, row + j), 0,
+ sizeof(*span_cell));
+
+ rowspan = j;
+
+ for (l = 0; l < i; l++)
+ for (m = 0; m < j; m++)
+ CELL(table, col + l, row + m)->rowspan = j;
+ goto see;
+ }
+
+ span_cell->is_used = span_cell->is_spanned = 1;
+ span_cell->rowspan = rowspan;
+ span_cell->colspan = colspan;
+ span_cell->col = col;
+ span_cell->row = row;
+ }
+ }
+
+ goto see;
+
+scan_done:
+// *end = html;
+
+ mem_free_if(l_fragment_id);
+
+ for (col = 0; col < table->cols; col++) for (row = 0; row < table->rows; row++) {
+ struct table_cell *cell = CELL(table, col, row);
+
+ if (!cell->is_spanned) {
+ if (cell->colspan == -1) cell->colspan = table->cols - col;
+ if (cell->rowspan == -1) cell->rowspan = table->rows - row;
+ }
+ }
+
+ if (table->rows) {
+ table->rows_heights = mem_calloc(table->rows, sizeof(*table->rows_heights));
+ if (!table->rows_heights)
+ goto abort;
+ } else {
+ table->rows_heights = NULL;
+ }
+
+ for (col = 0; col < table->columns_count; col++)
+ if (table->columns[col].width != WIDTH_AUTO)
+ set_td_width(table, col, table->columns[col].width, 1);
+ set_td_width(table, table->cols, WIDTH_AUTO, 0);
+
+ return table;
+
+abort:
+// *end = eof;
+ free_table(table);
return NULL;
}
@@ -277,7 +840,7 @@ tags_format_table(struct source_renderer *renderer, void *no)
html_context->table_level++;
- table = tags_parse_table(renderer, (part->document || part->box.x), no);
+ table = tags_parse_table(renderer, no, (part->document || part->box.x));
if (!table) goto ret0;
table->part = part;