/** CSS main parser * @file */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include "elinks.h" #include "config/options.h" #include "document/css/css.h" #include "document/css/parser.h" #include "document/css/property.h" #include "document/css/scanner.h" #include "document/css/stylesheet.h" #include "document/css/value.h" #include "util/color.h" #include "util/lists.h" #include "util/error.h" #include "util/memory.h" #include "util/string.h" static void css_parse_ruleset(struct css_stylesheet *css, struct scanner *scanner); void css_parse_properties(LIST_OF(struct css_property) *props, struct scanner *scanner) { assert(props && scanner); while (scanner_has_tokens(scanner)) { struct css_property_info *property_info = NULL; struct css_property *prop; struct scanner_token *token = get_scanner_token(scanner); int i; if (!token || token->type == '}') break; /* Extract property name. */ if (token->type != CSS_TOKEN_IDENT || !check_next_scanner_token(scanner, ':')) { /* Some use style="{ properties }" so we have to be * check what to skip to. */ if (token->type == '{') { skip_scanner_token(scanner); } else { skip_css_tokens(scanner, ';'); } continue; } for (i = 0; css_property_info[i].name; i++) { struct css_property_info *info = &css_property_info[i]; if (scanner_token_strlcasecmp(token, info->name, -1)) { property_info = info; break; } } /* Skip property name and separator and check for expression */ if (!skip_css_tokens(scanner, ':')) { assert(!scanner_has_tokens(scanner)); break; } if (!property_info) { /* Unknown property, check the next one. */ goto ride_on; } /* We might be on track of something, cook up the struct. */ prop = (struct css_property *)mem_calloc(1, sizeof(*prop)); if (!prop) { goto ride_on; } prop->type = property_info->type; prop->value_type = property_info->value_type; if (!css_parse_value(property_info, &prop->value, scanner)) { mem_free(prop); goto ride_on; } add_to_list(*props, prop); /* Maybe we have something else to go yet? */ ride_on: skip_css_tokens(scanner, ';'); } } static void skip_css_block(struct scanner *scanner) { if (skip_css_tokens(scanner, '{')) { const int preclimit = get_css_precedence('}'); int depth = 1; struct scanner_token *token = get_scanner_token(scanner); while (token && token->precedence <= preclimit && depth > 0) { if (token->type == '{') ++depth; else if (token->type == '}') --depth; token = get_next_scanner_token(scanner); } } } /* Parse a list of media types. * * Media types grammar: * * @verbatim * media_types: * * | * | media_types ',' * @endverbatim * * This does not entirely match appendix D of CSS2: ELinks allows any * list of media types to be empty, whereas CSS2 allows that only in * @@import and not in @@media. * * @return nonzero if the directive containing this list should take * effect, zero if not. */ static int css_parse_media_types(struct scanner *scanner) { int matched = 0; int empty = 1; const char *const optstr = get_opt_str("document.css.media", NULL); struct scanner_token *token = get_scanner_token(scanner); while (token && token->type == CSS_TOKEN_IDENT) { empty = 0; if (!matched) /* Skip string ops if already matched. */ matched = supports_css_media_type( optstr, token->string, token->length); token = get_next_scanner_token(scanner); if (!token || token->type != ',') break; token = get_next_scanner_token(scanner); } return matched || empty; } /** Parse an atrule from @a scanner and update @a css accordingly. * * Atrules grammar: * * @verbatim * atrule: * '@charset' ';' * | '@import' media_types ';' * | '@import' media_types ';' * | '@media' media_types '{' ruleset* '}' * | '@page' ? [':' ]? '{' properties '}' * | '@font-face' '{' properties '}' * @endverbatim */ static void css_parse_atrule(struct css_stylesheet *css, struct scanner *scanner, struct uri *base_uri) { struct scanner_token *token = get_scanner_token(scanner); struct string import_uri; /* Skip skip skip that code */ switch (token->type) { case CSS_TOKEN_AT_IMPORT: token = get_next_scanner_token(scanner); if (!token) break; if (token->type != CSS_TOKEN_STRING && token->type != CSS_TOKEN_URL) goto skip_rest_of_atrule; /* As of 2007-07, token->string points into the * original CSS text, so the pointer will remain * valid even if we parse more tokens. But this * may have to change when backslash escapes are * properly supported. So play it safe and make * a copy of the string. */ if (!init_string(&import_uri)) goto skip_rest_of_atrule; if (!add_bytes_to_string(&import_uri, token->string, token->length)) { done_string(&import_uri); goto skip_rest_of_atrule; } skip_scanner_token(scanner); if (!css_parse_media_types(scanner)) { done_string(&import_uri); goto skip_rest_of_atrule; } token = get_scanner_token(scanner); if (!token || token->type != ';') { done_string(&import_uri); goto skip_rest_of_atrule; } skip_scanner_token(scanner); assert(css->import); css->import(css, base_uri, import_uri.source, import_uri.length); done_string(&import_uri); break; case CSS_TOKEN_AT_CHARSET: skip_css_tokens(scanner, ';'); break; case CSS_TOKEN_AT_MEDIA: skip_scanner_token(scanner); if (!css_parse_media_types(scanner)) goto skip_rest_of_atrule; token = get_scanner_token(scanner); if (!token || token->type != '{') goto skip_rest_of_atrule; token = get_next_scanner_token(scanner); while (token && token->type != '}') { css_parse_ruleset(css, scanner); token = get_scanner_token(scanner); } if (token) skip_scanner_token(scanner); break; case CSS_TOKEN_AT_FONT_FACE: case CSS_TOKEN_AT_PAGE: skip_css_block(scanner); break; skip_rest_of_atrule: case CSS_TOKEN_AT_KEYWORD: /* TODO: Unkown @-rule so either skip til ';' or next block. */ token = get_scanner_token(scanner); while (token) { if (token->type == ';') { skip_scanner_token(scanner); break; } else if (token->type == '{') { skip_css_block(scanner); break; } token = get_next_scanner_token(scanner); } break; default: INTERNAL("@-rule parser called without atrule."); } } struct selector_pkg { LIST_HEAD(struct selector_pkg); struct css_selector *selector; }; /** Move a CSS selector and its leaves into a new set. If a similar * selector already exists in the set, merge them. * * @param sels * The set to which @a selector should be moved. Must not be NULL. * @param selector * The selector that should be moved. Must not be NULL. If it is * already in some set, this function removes it from there. * @param watch * This function updates @a *watch if it merges that selector into * another one. @a watch must not be NULL but @a *watch may be. * * @returns @a selector or the one into which it was merged. */ static struct css_selector * reparent_selector(struct css_selector_set *sels, struct css_selector *selector, struct css_selector **watch) { struct css_selector *twin = find_css_selector(sels, selector->type, selector->relation, selector->name, -1); if (twin) { merge_css_selectors(twin, selector); /* Reparent leaves. */ while (!css_selector_set_empty(&selector->leaves)) { struct css_selector *leaf = css_selector_set_front(&selector->leaves); reparent_selector(&twin->leaves, leaf, watch); } if (*watch == selector) *watch = twin; done_css_selector(selector); } else { if (css_selector_is_in_set(selector)) del_css_selector_from_set(selector); add_css_selector_to_set(selector, sels); } return twin ? twin : selector; } /** Parse a comma-separated list of CSS selectors from @a scanner. * Register the selectors in @a css so that get_css_selector_for_element() * will find them, and add them to @a selectors so that the caller can * attach properties to them. * * Our selector grammar: * * @verbatim * selector: * element_name? ('#' id)? ('.' class)? (':' pseudo_class)? \ * ((' ' | '>') selector)? * @endverbatim */ static void css_parse_selector(struct css_stylesheet *css, struct scanner *scanner, LIST_OF(struct selector_pkg) *selectors) { /* Shell for the last selector (the whole selector chain, that is). */ struct selector_pkg *pkg = NULL; /* In 'p#x.y i.z', it's NULL for 'p', 'p' for '#x', '.y' and 'i', and * 'i' for '.z'. */ struct css_selector *prev_element_selector = NULL; /* In 'p#x.y:q i', it's NULL for 'p' and '#x', '#x' for '.y', and '.y' * for ':q', and again NULL for 'i'. */ struct css_selector *prev_specific_selector = NULL; /* In 'p#x.y div.z:a' it is NULL for 'p#x.y' and 'div', and 'p' for * '.z' and ':a'. So the difference from @prev_element_selector is that * it is changed after the current selector fragment is finished, not * right after the base selector is loaded. So it is set differently * for the '#x.y' and '.z:a' parts of selector. */ struct css_selector *last_chained_selector = NULL; /* In 'p#x.y div.z:a, i.b {}', it's set for ':a' and '.b'. */ int last_fragment = 0; /* In 'p#x .y', it's set for 'p' and '.y'. Note that it is always set in * the previous iteration so it's valid for the current token only * before "saving" the token. */ int selector_start = 1; /* FIXME: element can be even '*' --pasky */ while (scanner_has_tokens(scanner)) { struct scanner_token *token = get_scanner_token(scanner); struct scanner_token last_token; struct css_selector *selector; enum css_selector_relation reltype = CSR_ROOT; css_selector_type_T seltype = CST_ELEMENT; assert(token); assert(!last_fragment); if (token->type == '{' || token->type == '}' || token->type == ';') break; /* Examine the selector fragment */ if (token->type != CSS_TOKEN_IDENT) { switch (token->type) { case CSS_TOKEN_HASH: case CSS_TOKEN_HEX_COLOR: seltype = CST_ID; reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY; break; case '.': seltype = CST_CLASS; reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY; break; case ':': seltype = CST_PSEUDO; reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY; break; case '>': seltype = CST_ELEMENT; reltype = CSR_PARENT; break; default: /* FIXME: Temporary fix for this weird CSS * precedence thing. ')' has higher than ',' * and it can cause problems when skipping * here. The reason is for the function() * parsing. Hmm... --jonas */ if (!skip_css_tokens(scanner, ',')) skip_scanner_token(scanner); seltype = CST_INVALID; break; } if (seltype == CST_INVALID) continue; /* Hexcolor and hash already contains the ident * inside. */ if (token->type != CSS_TOKEN_HEX_COLOR && token->type != CSS_TOKEN_HASH) { token = get_next_scanner_token(scanner); if (!token) break; if (token->type != CSS_TOKEN_IDENT) /* wtf */ continue; } else { /* Skip the leading '#'. */ token->string++, token->length--; } } else { if (pkg) reltype = CSR_ANCESTOR; } /* Look ahead at what's coming next */ copy_struct(&last_token, token); /* Detect whether upcoming tokens are separated by * whitespace or not (that's important for determining * whether it's a combinator or specificitier). */ if (last_token.string + last_token.length < scanner->end) { selector_start = last_token.string[last_token.length]; selector_start = (selector_start != '#' && selector_start != '.' && selector_start != ':'); } /* else it doesn't matter as we are gonna bail out anyway. */ token = get_next_scanner_token(scanner); if (!token) break; last_fragment = (token->type == ',' || token->type == '{'); /* Register the selector */ if (!pkg) { selector = get_css_base_selector( last_fragment ? css : NULL, seltype, CSR_ROOT, last_token.string, last_token.length); if (!selector) continue; pkg = (struct selector_pkg *)mem_calloc(1, sizeof(*pkg)); if (!pkg) continue; add_to_list(*selectors, pkg); pkg->selector = selector; } else if (reltype == CSR_SPECIFITY) { /* We append under the last fragment. */ struct css_selector *base_sel = prev_specific_selector; if (!base_sel) base_sel = prev_element_selector; assert(base_sel); selector = get_css_selector(&base_sel->leaves, seltype, reltype, last_token.string, last_token.length); if (!selector) continue; if (last_chained_selector) { /* The situation is like: 'div p#x', now it was * 'p -> div', but we need to redo that as * '(p ->) #x -> div'. */ del_css_selector_from_set(last_chained_selector); add_css_selector_to_set(last_chained_selector, &selector->leaves); } if (pkg->selector == base_sel) { /* This is still just specificitying offspring * of the previous pkg->selector. */ pkg->selector = selector; } if (last_fragment) { /* This is the last fragment of the selector * chain, that means the last base fragment * wasn't marked so and thus wasn't bound to * the stylesheet. Let's do that now. */ assert(prev_element_selector); set_css_selector_relation(prev_element_selector, CSR_ROOT); prev_element_selector = reparent_selector(&css->selectors, prev_element_selector, &pkg->selector); } } else /* CSR_PARENT || CSR_ANCESTOR */ { /* We - in the perlish speak - unshift in front * of the previous selector fragment and reparent * it to the upcoming one. */ selector = get_css_base_selector( last_fragment ? css : NULL, seltype, CSR_ROOT, last_token.string, last_token.length); if (!selector) continue; assert(prev_element_selector); set_css_selector_relation(prev_element_selector, reltype); add_css_selector_to_set(prev_element_selector, &selector->leaves); last_chained_selector = prev_element_selector; } /* Record the selector fragment for future generations */ if (reltype == CSR_SPECIFITY) { prev_specific_selector = selector; } else { prev_element_selector = selector; prev_specific_selector = NULL; } /* What to do next */ if (last_fragment) { /* Next selector coming, clean up. */ pkg = NULL; last_fragment = 0; selector_start = 1; prev_element_selector = NULL; prev_specific_selector = NULL; last_chained_selector = NULL; } if (token->type == ',') { /* Another selector hooked to these properties. */ skip_scanner_token(scanner); } else if (token->type == '{') { /* End of selector list. */ break; } /* else Another selector fragment probably coming up. */ } /* Wipe the selector we were currently composing, if any. */ if (pkg) { if (prev_element_selector) done_css_selector(prev_element_selector); del_from_list(pkg); mem_free(pkg); } } /** Parse a ruleset from @a scanner to @a css. * * Ruleset grammar: * * @verbatim * ruleset: * selector [ ',' selector ]* '{' properties '}' * @endverbatim */ static void css_parse_ruleset(struct css_stylesheet *css, struct scanner *scanner) { INIT_LIST_OF(struct selector_pkg, selectors); INIT_LIST_OF(struct css_property, properties); struct selector_pkg *pkg; css_parse_selector(css, scanner, &selectors); if (list_empty(selectors) || !skip_css_tokens(scanner, '{')) { if (!list_empty(selectors)) free_list(selectors); skip_css_tokens(scanner, '}'); return; } /* We don't handle the case where a property has already been added to * a selector. That doesn't matter though, because the best one will be * always the last one (FIXME: 'important!'), therefore the applier * will take it last and it will have the "final" effect. * * So it's only a little waste and no real harm. The thing is, what do * you do when you have 'background: #fff' and then 'background: * x-repeat'? It would require yet another logic to handle merging of * these etc and the induced overhead would in most cases mean more * waste that having the property multiple times in a selector, I * believe. --pasky */ pkg = (struct selector_pkg *)selectors.next; css_parse_properties(&properties, scanner); skip_css_tokens(scanner, '}'); /* Mirror the properties to all the selectors. */ foreach (pkg, selectors) { #ifdef DEBUG_CSS /* Cannot use list_empty() inside the arglist of DBG() * because GCC 4.1 "warning: operation on `errfile' * may be undefined" breaks the build with -Werror. */ int dbg_has_properties = !list_empty(properties); int dbg_has_leaves = !css_selector_set_empty(&pkg->selector->leaves); DBG("Binding properties (!!%d) to selector %s (type %d, relation %d, children %d)", dbg_has_properties, pkg->selector->name, pkg->selector->type, pkg->selector->relation, dbg_has_leaves); #endif add_selector_properties(pkg->selector, &properties); } free_list(selectors); free_list(properties); } void css_parse_stylesheet(struct css_stylesheet *css, struct uri *base_uri, const char *string, const char *end) { struct scanner scanner; init_scanner(&scanner, &css_scanner_info, string, end); while (scanner_has_tokens(&scanner)) { struct scanner_token *token = get_scanner_token(&scanner); assert(token); switch (token->type) { case CSS_TOKEN_AT_KEYWORD: case CSS_TOKEN_AT_CHARSET: case CSS_TOKEN_AT_FONT_FACE: case CSS_TOKEN_AT_IMPORT: case CSS_TOKEN_AT_MEDIA: case CSS_TOKEN_AT_PAGE: css_parse_atrule(css, &scanner, base_uri); break; default: /* And WHAT ELSE could it be?! */ css_parse_ruleset(css, &scanner); } } #ifdef DEBUG_CSS dump_css_selector_tree(&css->selectors); WDBG("That's it."); #endif }