/* Plain text document renderer */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include "elinks.h" #include "bookmarks/bookmarks.h" #include "cache/cache.h" #include "config/options.h" #include "document/docdata.h" #include "document/document.h" #include "document/options.h" #include "document/plain/renderer.h" #include "document/renderer.h" #include "globhist/globhist.h" #include "intl/charsets.h" #include "protocol/protocol.h" #include "protocol/uri.h" #include "terminal/color.h" #include "terminal/draw.h" #include "util/color.h" #include "util/error.h" #include "util/memory.h" #include "util/string.h" struct plain_renderer { /* The document being renderered */ struct document *document; /* The data and data length of the defragmented cache entry */ unsigned char *source; int length; /* The convert table that should be used for converting line strings to * the rendered strings. */ struct conv_table *convert_table; /* The default template char data for text */ struct screen_char template; /* The maximum width any line can have (used for wrapping text) */ int max_width; /* The current line number */ int lineno; /* Are we doing line compression */ unsigned int compress:1; }; #define realloc_document_links(doc, size) \ ALIGN_LINK(&(doc)->links, (doc)->nlinks, size) static struct screen_char * realloc_line(struct document *document, int x, int y) { struct line *line = realloc_lines(document, y); if (!line) return NULL; if (x != line->length) { if (!ALIGN_LINE(&line->chars, line->length, x)) return NULL; line->length = x; } return line->chars; } static inline struct link * add_document_link(struct document *document, unsigned char *uri, int length, int x, int y) { struct link *link; struct point *point; if (!realloc_document_links(document, document->nlinks + 1)) return NULL; link = &document->links[document->nlinks]; if (!realloc_points(link, length)) return NULL; link->npoints = length; link->type = LINK_HYPERTEXT; link->where = uri; link->color.background = document->options.default_bg; link->color.foreground = document->options.default_link; link->number = document->nlinks; for (point = link->points; length > 0; length--, point++, x++) { point->x = x; point->y = y; } document->nlinks++; document->links_sorted = 0; return link; } /* Searches a word to find an email adress or an URI to add as a link. */ static inline struct link * check_link_word(struct document *document, unsigned char *uri, int length, int x, int y) { struct uri test; unsigned char *where = NULL; unsigned char *mailto = memchr(uri, '@', length); int keep = uri[length]; struct link *new_link; assert(document); if_assert_failed return NULL; uri[length] = 0; if (mailto && mailto > uri && mailto - uri < length - 1) { where = straconcat("mailto:", uri, NULL); } else if (parse_uri(&test, uri) == URI_ERRNO_OK && test.protocol != PROTOCOL_UNKNOWN && (test.datalen || test.hostlen)) { where = memacpy(uri, length); } uri[length] = keep; if (!where) return NULL; /* We need to reparse the URI and normalize it so that the protocol and * host part are converted to lowercase. */ normalize_uri(NULL, where); new_link = add_document_link(document, where, length, x, y); if (!new_link) mem_free(where); return new_link; } #define url_char(c) ( \ (c) > ' ' \ && (c) != '<' \ && (c) != '>' \ && (c) != '(' \ && (c) != ')' \ && !isquote(c)) static inline int get_uri_length(unsigned char *line, int length) { int uri_end = 0; while (uri_end < length && url_char(line[uri_end])) uri_end++; for (; uri_end > 0; uri_end--) { if (line[uri_end - 1] != '.' && line[uri_end - 1] != ',') break; } return uri_end; } static int print_document_link(struct plain_renderer *renderer, int lineno, unsigned char *line, int line_pos, int width, int expanded, struct screen_char *pos) { struct document *document = renderer->document; unsigned char *start = &line[line_pos]; int len = get_uri_length(start, width - line_pos); int screen_column = line_pos + expanded; struct link *new_link; int link_end = line_pos + len; unsigned char saved_char; struct document_options *doc_opts = &document->options; struct screen_char template = renderer->template; int i; if (!len) return 0; new_link = check_link_word(document, start, len, screen_column, lineno); if (!new_link) return 0; saved_char = line[link_end]; line[link_end] = '\0'; if (0) ; /* Shut up compiler */ #ifdef CONFIG_GLOBHIST else if (get_global_history_item(start)) new_link->color.foreground = doc_opts->default_vlink; #endif #ifdef CONFIG_BOOKMARKS else if (get_bookmark(start)) new_link->color.foreground = doc_opts->default_bookmark_link; #endif else new_link->color.foreground = doc_opts->default_link; line[link_end] = saved_char; new_link->color.background = doc_opts->default_bg; set_term_color(&template, &new_link->color, doc_opts->color_flags, doc_opts->color_mode); for (i = len; i; i--) { template.data = line[line_pos++]; copy_screen_chars(pos++, &template, 1); } return len; } static inline int add_document_line(struct plain_renderer *renderer, unsigned char *line, int line_width) { struct document *document = renderer->document; struct screen_char *template = &renderer->template; struct screen_char saved_renderer_template = *template; struct screen_char *pos, *startpos; #ifdef CONFIG_UTF_8 unsigned char *end, *text; int utf8 = document->options.utf8; #endif /* CONFIG_UTF_8 */ int lineno = renderer->lineno; int expanded = 0; int width = line_width; int line_pos; line = convert_string(renderer->convert_table, line, width, document->options.cp, CSM_NONE, &width, NULL, NULL); if (!line) return 0; /* Now expand tabs */ for (line_pos = 0; line_pos < width; line_pos++) { unsigned char line_char = line[line_pos]; if (line_char == ASCII_TAB && (line_pos + 1 == width || line[line_pos + 1] != ASCII_BS)) { int tab_width = 7 - ((line_pos + expanded) & 7); expanded += tab_width; } else if (line_char == ASCII_BS) { #if 0 This does not work: Suppose we have seventeen spaces followed by a back-space; that will call for sixteen bytes of memory, but we will print seventeen spaces before we hit the back-space -- overflow! /* Don't count the character * that the back-space character will delete */ if (expanded + line_pos) expanded--; #endif #if 0 /* Don't count the back-space character */ if (expanded > 0) expanded--; #endif } } assert(expanded >= 0); #ifdef CONFIG_UTF_8 if (utf8) goto utf_8; #endif /* CONFIG_UTF_8 */ startpos = pos = realloc_line(document, width + expanded, lineno); if (!pos) { mem_free(line); return 0; } expanded = 0; for (line_pos = 0; line_pos < width; line_pos++) { unsigned char line_char = line[line_pos]; unsigned char next_char, prev_char; prev_char = line_pos > 0 ? line[line_pos - 1] : '\0'; next_char = (line_pos + 1 < width) ? line[line_pos + 1] : '\0'; /* Do not expand tabs that precede back-spaces; this saves the * back-space code some trouble. */ if (line_char == ASCII_TAB && next_char != ASCII_BS) { int tab_width = 7 - ((line_pos + expanded) & 7); expanded += tab_width; template->data = ' '; do copy_screen_chars(pos++, template, 1); while (tab_width--); *template = saved_renderer_template; } else if (line_char == ASCII_BS) { if (!(expanded + line_pos)) { /* We've backspaced to the start of the line */ continue; } if (pos > startpos) pos--; /* Backspace */ /* Handle x^H_ as _^Hx, but prevent an infinite loop * swapping two underscores. */ if (next_char == '_' && prev_char != '_') { /* x^H_ becomes _^Hx */ if (line_pos - 1 >= 0) line[line_pos - 1] = next_char; if (line_pos + 1 < width) line[line_pos + 1] = prev_char; /* Go back and reparse the swapped characters */ if (line_pos - 2 >= 0) line_pos -= 2; continue; } if ((expanded + line_pos) - 2 >= 0) { /* Don't count the backspace character or the * deleted character when returning the line's * width or when expanding tabs. */ expanded -= 2; } if (pos->data == '_' && next_char == '_') { /* Is _^H_ an underlined underscore * or an emboldened underscore? */ if (expanded + line_pos >= 0 && pos - 1 >= startpos && (pos - 1)->attr) { /* There is some preceding text, * and it has an attribute; copy it */ template->attr |= (pos - 1)->attr; } else { /* Default to bold; seems more useful * than underlining the underscore */ template->attr |= SCREEN_ATTR_BOLD; } } else if (pos->data == '_') { /* Underline _^Hx */ template->attr |= SCREEN_ATTR_UNDERLINE; } else if (pos->data == next_char) { /* Embolden x^Hx */ template->attr |= SCREEN_ATTR_BOLD; } /* Handle _^Hx^Hx as both bold and underlined */ if (template->attr) template->attr |= pos->attr; } else { int added_chars = 0; if (document->options.plain_display_links && isalpha(line_char) && isalpha(next_char)) { /* We only want to check for a URI if there are * at least two consecutive alphabetic * characters, or if we are at the very start of * the line. It improves performance a bit. * --Zas */ added_chars = print_document_link(renderer, lineno, line, line_pos, width, expanded, pos); } if (added_chars) { line_pos += added_chars - 1; pos += added_chars; } else { if (!isscreensafe(line_char)) line_char = '.'; template->data = line_char; copy_screen_chars(pos++, template, 1); /* Detect copy of nul chars to screen, this * should not occur. --Zas */ assert(line_char); } *template = saved_renderer_template; } } #ifdef CONFIG_UTF_8 goto end; utf_8: end = line + width; startpos = pos = realloc_line(document, width + expanded, lineno); if (!pos) { mem_free(line); return 0; } expanded = 0; for (text = line; text < end; ) { unsigned char line_char = *text; unsigned char next_char, prev_char; line_pos = text - line; prev_char = text > line ? *(text - 1) : '\0'; next_char = (text + 1 < end) ? *(text + 1) : '\0'; /* Do not expand tabs that precede back-spaces; this saves the * back-space code some trouble. */ if (line_char == ASCII_TAB && next_char != ASCII_BS) { int tab_width = 7 - ((line_pos + expanded) & 7); expanded += tab_width; template->data = ' '; do copy_screen_chars(pos++, template, 1); while (tab_width--); *template = saved_renderer_template; text++; } else if (line_char == ASCII_BS) { if (!(expanded + line_pos)) { /* We've backspaced to the start of the line */ if (expanded > 0) expanded--; /* Don't count it */ continue; } if (pos > startpos) pos--; /* Backspace */ /* Handle x^H_ as _^Hx, but prevent an infinite loop * swapping two underscores. */ if (next_char == '_' && prev_char != '_') { /* x^H_ becomes _^Hx */ if (text - 1 >= line) *(text - 1) = next_char; if (text + 1 < end) *(text + 1) = prev_char; /* Go back and reparse the swapped characters */ if (text - 2 >= line) text -= 2; continue; } if (expanded - 2 >= 0) { /* Don't count the backspace character or the * deleted character when returning the line's * width or when expanding tabs. */ expanded -= 2; } if (pos->data == '_' && next_char == '_') { /* Is _^H_ an underlined underscore * or an emboldened underscore? */ if (expanded + line_pos >= 0 && pos - 1 >= startpos && (pos - 1)->attr) { /* There is some preceding text, * and it has an attribute; copy it */ template->attr |= (pos - 1)->attr; } else { /* Default to bold; seems more useful * than underlining the underscore */ template->attr |= SCREEN_ATTR_BOLD; } } else if (pos->data == '_') { /* Underline _^Hx */ template->attr |= SCREEN_ATTR_UNDERLINE; } else if (pos->data == next_char) { /* Embolden x^Hx */ template->attr |= SCREEN_ATTR_BOLD; } /* Handle _^Hx^Hx as both bold and underlined */ if (template->attr) template->attr |= pos->attr; text++; } else { int added_chars = 0; if (document->options.plain_display_links && isalpha(line_char) && isalpha(next_char)) { /* We only want to check for a URI if there are * at least two consecutive alphabetic * characters, or if we are at the very start of * the line. It improves performance a bit. * --Zas */ added_chars = print_document_link(renderer, lineno, line, line_pos, width, expanded, pos); } if (added_chars) { text += added_chars; pos += added_chars; } else { unicode_val_T data = utf_8_to_unicode(&text, end); if (data == UCS_NO_CHAR) text++; template->data = (uint16_t)data; copy_screen_chars(pos++, template, 1); /* Detect copy of nul chars to screen, this * should not occur. --Zas */ assert(line_char); } *template = saved_renderer_template; } } end: #endif /* CONFIG_UTF_8 */ mem_free(line); realloc_line(document, pos - startpos, lineno); return width + expanded; } static void init_template(struct screen_char *template, struct document_options *options) { color_T background = options->default_bg; color_T foreground = options->default_fg; struct color_pair colors = INIT_COLOR_PAIR(background, foreground); template->attr = 0; template->data = ' '; set_term_color(template, &colors, options->color_flags, options->color_mode); } static struct node * add_node(struct plain_renderer *renderer, int x, int width, int height) { struct node *node = mem_alloc(sizeof(*node)); if (node) { struct document *document = renderer->document; set_box(&node->box, x, renderer->lineno, width, height); int_lower_bound(&document->width, width); int_lower_bound(&document->height, height); add_to_list(document->nodes, node); } return node; } static void add_document_lines(struct plain_renderer *renderer) { unsigned char *source = renderer->source; int length = renderer->length; int was_empty_line = 0; int was_wrapped = 0; for (; length > 0; renderer->lineno++) { unsigned char *xsource; int width, added, only_spaces = 1, spaces = 0, was_spaces = 0; int last_space = 0; int tab_spaces = 0; int step = 0; int doc_width = int_min(renderer->max_width, length); /* End of line detection: We handle \r, \r\n and \n types. */ for (width = 0; width + tab_spaces < doc_width; width++) { if (source[width] == ASCII_CR) step++; if (source[width + step] == ASCII_LF) step++; if (step) break; if (isspace(source[width])) { last_space = width; if (only_spaces) spaces++; else was_spaces++; if (source[width] == '\t') tab_spaces += 7 - ((width + tab_spaces) % 8); } else { only_spaces = 0; was_spaces = 0; } } if (only_spaces && step) { if (was_wrapped || (renderer->compress && was_empty_line)) { /* Successive empty lines will appear as one. */ length -= step + spaces; source += step + spaces; renderer->lineno--; assert(renderer->lineno >= 0); continue; } was_empty_line = 1; /* No need to keep whitespaces on an empty line. */ source += spaces; length -= spaces; width -= spaces; } else { was_empty_line = 0; was_wrapped = !step; if (was_spaces && step) { /* Drop trailing whitespaces. */ width -= was_spaces; step += was_spaces; } if (!step && (width < length) && last_space) { width = last_space; step = 1; } } assert(width >= 0); /* We will touch the supplied source, so better replicate it. */ xsource = memacpy(source, width); if (!xsource) continue; added = add_document_line(renderer, xsource, width); mem_free(xsource); if (added) { /* Add (search) nodes on a line by line basis */ add_node(renderer, 0, added, 1); } /* Skip end of line chars too. */ width += step; length -= width; source += width; } assert(!length); } void render_plain_document(struct cache_entry *cached, struct document *document, struct string *buffer) { struct conv_table *convert_table; unsigned char *head = empty_string_or_(cached->head); struct plain_renderer renderer; convert_table = get_convert_table(head, document->options.cp, document->options.assume_cp, &document->cp, &document->cp_status, document->options.hard_assume); renderer.source = buffer->source; renderer.length = buffer->length; renderer.document = document; renderer.lineno = 0; renderer.convert_table = convert_table; renderer.compress = document->options.plain_compress_empty_lines; renderer.max_width = document->options.wrap ? document->options.box.width : INT_MAX; document->bgcolor = document->options.default_bg; document->width = 0; #ifdef CONFIG_UTF_8 document->options.utf8 = is_cp_special(document->options.cp); #endif /* CONFIG_UTF_8 */ /* Setup the style */ init_template(&renderer.template, &document->options); add_document_lines(&renderer); }