1
0
mirror of https://github.com/rkd77/elinks.git synced 2025-01-03 14:57:44 -05:00

[css2xpath] placeholders for Rules

This commit is contained in:
Witold Filipczyk 2021-10-02 19:47:23 +02:00
parent 928afe522c
commit 413a2cb74a

View File

@ -131,6 +131,154 @@ dollar_equal_rule_apply(std::string &selector)
return preg_replace_callback(pattern, dollar_equal_rule_callback, selector); return preg_replace_callback(pattern, dollar_equal_rule_callback, selector);
} }
class Translator;
class Rule
{
public:
std::string apply(std::string &selector)
{
return selector;
}
Rule()
{
}
};
class RegexRule : public Rule
{
private:
const char *pattern, *replacement;
public:
RegexRule(const char *pat, const char *repl) : pattern(pat), replacement(repl)
{
}
};
class NotRule : public Rule
{
public:
NotRule(Translator *t)
{
}
};
class NthChildRule : public Rule
{
public:
NthChildRule()
{
}
};
class DollarEqualRule : public Rule
{
public:
DollarEqualRule()
{
}
};
class Translator
{
public:
std::string translate(std::string & selector)
{
for (auto r : rules)
{
selector = r->apply(selector);
}
return selector == "/" ? "/" : ("//" + selector);
}
Translator()
{
}
private:
Rule *rules[33] = {
// prefix|name
new RegexRule("([a-zA-Z0-9\\_\\-\\*]+)\\|([a-zA-Z0-9\\_\\-\\*]+)", "$1:$2"),
// add @ for attribs
new RegexRule("\\[([^G\\]~\\$\\*\\^\\|\\!]+)(=[^\\]]+)?\\]", "[@$1$2]"),
// multiple queries
new RegexRule("\\s*,\\s*", "|"),
// , + ~ >
new RegexRule("\\s*([\\+~>])\\s*", "$1"),
//* ~ + >
new RegexRule("([a-zA-Z0-9\\_\\-\\*])~([a-zA-Z0-9\\_\\-\\*])", "$1/following-sibling::$2"),
new RegexRule("([a-zA-Z0-9\\_\\-\\*])\\+([a-zA-Z0-9\\_\\-\\*])", "$1/following-sibling::*[1]/self::$2"),
new RegexRule("([a-zA-Z0-9\\_\\-\\*])>([a-zA-Z0-9\\_\\-\\*])", "$1/$2"),
// all unescaped stuff escaped
new RegexRule("\\[([^=]+)=([^'|][^\\]]*)\\]", "[$1=\"$2\"]"),
// all descendant or self to //
new RegexRule("(^|[^a-zA-Z0-9\\_\\-\\*])([#\\.])([a-zA-Z0-9\\_\\-]+)", "$1*$2$3"),
new RegexRule("([\\>\\+\\|\\~\\,\\s])([a-zA-Z\\*]+)", "$1//$2"),
new RegexRule("\\s+\\/\\//", "//"),
// :first-child
new RegexRule("([a-zA-Z0-9\\_\\-\\*]+):first-child", "*[1]/self::$1"),
// :last-child
new RegexRule("([a-zA-Z0-9\\_\\-\\*]+)?:last-child", "$1[not(following-sibling::*)]"),
// :only-child
new RegexRule("([a-zA-Z0-9\\_\\-\\*]+):only-child", "*[last()=1]/self::$1"),
// :empty
new RegexRule("([a-zA-Z0-9\\_\\-\\*]+)?:empty", "$1[not(*) and not(normalize-space())]"),
// :not
new NotRule(this),
// :nth-child
new NthChildRule(),
// :contains(selectors)
new RegexRule(":contains\\(([^\\)]*)\\)", "[contains(string(.),\"$1\")]"),
// |= attrib
new RegexRule("\\[([a-zA-Z0-9\\_\\-]+)\\|=([^\\]]+)\\]", "[@$1=$2 or starts-with(@$1,concat($2,\"-\"))]"),
// *= attrib
new RegexRule("\\[([a-zA-Z0-9\\_\\-]+)\\*=([^\\]]+)\\]", "[contains(@$1,$2)]"),
// ~= attrib
new RegexRule("\\[([a-zA-Z0-9\\_\\-]+)~=([^\\]]+)\\]", "[contains(concat(\" \",normalize-space(@$1),\" \"),concat(\" \",$2,\" \"))]"),
// ^= attrib
new RegexRule("\\[([a-zA-Z0-9\\_\\-]+)\\^=([^\\]]+)\\]", "[starts-with(@$1,$2)]"),
// $= attrib
new DollarEqualRule(),
// != attrib
new RegexRule("\\[([a-zA-Z0-9\\_\\-]+)\\!=[\\\"']+?([^\\\"\\]]+)[\\\"']+?\\]", "[not(@$1) or @$1!=\"$2\"]"),
// ids
new RegexRule("#([a-zA-Z0-9\\_\\-]+)", "[@id=\"$1\"]"),
// classes
new RegexRule("\\.([a-zA-Z0-9_-]+)(?![^[]*])", "[contains(concat(\" \",normalize-space(@class),\" \"),\" $1 \")]"),
// normalize multiple filters
new RegexRule("\\]\\[([^\\]]+)", " and ($1)"),
// tag:pseudo selectors
new RegexRule("(:enabled)", "[not(@disabled)]"),
new RegexRule("(:checked)", "[@checked=\"checked\"]"),
new RegexRule(":(disabled)", "[@$1]"),
new RegexRule(":root", "/"),
// use * when tag was omitted
new RegexRule("^\[", "*["),
new RegexRule("\\|\\[", "|*[")
};
};
#if 0 #if 0