diff --git a/meson.build b/meson.build index 043f30ff..b82687b8 100644 --- a/meson.build +++ b/meson.build @@ -88,6 +88,7 @@ conf_data.set('CONFIG_LIBEVENT', get_option('libevent')) conf_data.set('CONFIG_X', get_option('x')) conf_data.set('CONFIG_LIBDOM', get_option('libdom')) conf_data.set('CONFIG_HTMLCXX', get_option('htmlcxx')) +conf_data.set('CONFIG_XML', get_option('xml')) #CONFIG_BOOKMARKS=true #CONFIG_XBEL_BOOKMARKS=true @@ -301,6 +302,14 @@ if conf_data.get('CONFIG_HTMLCXX') deps += htmlcxxdeps endif +if conf_data.get('CONFIG_XML') + xml2deps = dependency('libxml2') + deps += xml2deps + xmlplusplusdeps = dependency('libxml++-5.0') + deps += xmlplusplusdeps +endif + + if conf_data.get('CONFIG_SCRIPTING_PYTHON') python3deps = dependency('python3-embed') deps += python3deps diff --git a/meson_options.txt b/meson_options.txt index 47b45c98..a5f82b24 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -62,3 +62,4 @@ option('libevent', type: 'boolean', value: false, description: 'compile with lib option('x', type: 'boolean', value: false, description: 'use the X Window System') option('libdom', type: 'boolean', value: false, description: 'libdom') option('htmlcxx', type: 'boolean', value: false, description: 'htmlcxx') +option('xml', type: 'boolean', value: false, description: 'libxml++') diff --git a/src/ecmascript/spidermonkey/document.c b/src/ecmascript/spidermonkey/document.c index 153ae493..c637308c 100644 --- a/src/ecmascript/spidermonkey/document.c +++ b/src/ecmascript/spidermonkey/document.c @@ -49,8 +49,9 @@ #include "viewer/text/link.h" #include "viewer/text/vs.h" -#include -using namespace htmlcxx; +#include +#include +#include #include @@ -698,12 +699,14 @@ document_parse(struct document *document) add_bytes_to_string(&str, f->data, f->length); - HTML::ParserDom parser; - tree *dom = new tree; - *dom = parser.parseTree(str.source); + // Parse HTML and create a DOM tree + xmlDoc* doc = htmlReadDoc((xmlChar*)str.source, NULL, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); + // Encapsulate raw libxml document in a libxml++ wrapper + xmlNode* r = xmlDocGetRootElement(doc); + xmlpp::Element* root = new xmlpp::Element(r); done_string(&str); - return (void *)dom; + return (void *)root; } @@ -731,9 +734,7 @@ document_getElementById(JSContext *ctx, unsigned int argc, JS::Value *vp) return true; } - tree *dom = document->dom; - tree::iterator it = dom->begin(); - tree::iterator end = dom->end(); + xmlpp::Element* root = (xmlpp::Element *)document->dom; struct string idstr; @@ -741,21 +742,22 @@ document_getElementById(JSContext *ctx, unsigned int argc, JS::Value *vp) jshandle_value_to_char_string(&idstr, ctx, &args[0]); std::string id = idstr.source; - JSObject *elem = nullptr; - - for (; it != end; ++it) { - if (it->isTag()) { - it->parseAttributes(); - if (it->attribute("id").first && it->attribute("id").second == id) { - tree *node = new tree; - *node = *it; - elem = getElement(ctx, node); - break; - } - } - } + std::string xpath = "//*[@id=\""; + xpath += id; + xpath += "\"]"; done_string(&idstr); + + auto elements = root->find(xpath); + + if (elements.size() == 0) { + args.rval().setNull(); + return true; + } + + auto node = elements[0]; + JSObject *elem = getElement(ctx, node); + if (elem) { args.rval().setObject(*elem); } else { diff --git a/src/ecmascript/spidermonkey/element.c b/src/ecmascript/spidermonkey/element.c index 84f10b7c..1df984b0 100644 --- a/src/ecmascript/spidermonkey/element.c +++ b/src/ecmascript/spidermonkey/element.c @@ -46,12 +46,12 @@ #include "viewer/text/link.h" #include "viewer/text/vs.h" +#include + #include #include #include -using namespace htmlcxx; - static bool element_get_property_childElementCount(JSContext *ctx, unsigned int argc, JS::Value *vp); static bool element_get_property_className(JSContext *ctx, unsigned int argc, JS::Value *vp); static bool element_set_property_className(JSContext *ctx, unsigned int argc, JS::Value *vp); @@ -84,16 +84,16 @@ JSClass element_class = { }; JSPropertySpec element_props[] = { - JS_PSG("childElementCount", element_get_property_childElementCount, JSPROP_ENUMERATE), - JS_PSGS("className", element_get_property_className, element_set_property_className, JSPROP_ENUMERATE), - JS_PSGS("dir", element_get_property_dir, element_set_property_dir, JSPROP_ENUMERATE), +// JS_PSG("childElementCount", element_get_property_childElementCount, JSPROP_ENUMERATE), +// JS_PSGS("className", element_get_property_className, element_set_property_className, JSPROP_ENUMERATE), +// JS_PSGS("dir", element_get_property_dir, element_set_property_dir, JSPROP_ENUMERATE), JS_PSGS("id", element_get_property_id, element_set_property_id, JSPROP_ENUMERATE), - JS_PSGS("innerHTML", element_get_property_innerHtml, element_set_property_innerHtml, JSPROP_ENUMERATE), - JS_PSGS("lang", element_get_property_lang, element_set_property_lang, JSPROP_ENUMERATE), - JS_PSGS("outerHTML", element_get_property_outerHtml, element_set_property_outerHtml, JSPROP_ENUMERATE), - JS_PSG("tagName", element_get_property_tagName, JSPROP_ENUMERATE), - JS_PSGS("textContent", element_get_property_textContent, element_set_property_textContent, JSPROP_ENUMERATE), - JS_PSGS("title", element_get_property_title, element_set_property_title, JSPROP_ENUMERATE), +// JS_PSGS("innerHTML", element_get_property_innerHtml, element_set_property_innerHtml, JSPROP_ENUMERATE), +// JS_PSGS("lang", element_get_property_lang, element_set_property_lang, JSPROP_ENUMERATE), +// JS_PSGS("outerHTML", element_get_property_outerHtml, element_set_property_outerHtml, JSPROP_ENUMERATE), +// JS_PSG("tagName", element_get_property_tagName, JSPROP_ENUMERATE), +// JS_PSGS("textContent", element_get_property_textContent, element_set_property_textContent, JSPROP_ENUMERATE), +// JS_PSGS("title", element_get_property_title, element_set_property_title, JSPROP_ENUMERATE), JS_PS_END }; @@ -268,17 +268,14 @@ element_get_property_id(JSContext *ctx, unsigned int argc, JS::Value *vp) return false; } - tree *el = JS_GetPrivate(hobj); + xmlpp::Element *el = JS_GetPrivate(hobj); if (!el) { args.rval().setNull(); return true; } - tree::iterator it = el->begin(); - it->parseAttributes(); - std::string v = it->attribute("id").second; - + std::string v = el->get_attribute_value("id"); args.rval().setString(JS_NewStringCopyZ(ctx, v.c_str())); return true; @@ -877,8 +874,8 @@ static bool element_hasAttribute(JSContext *ctx, unsigned int argc, JS::Value *r static bool element_hasAttributes(JSContext *ctx, unsigned int argc, JS::Value *rval); const spidermonkeyFunctionSpec element_funcs[] = { - { "hasAttribute", element_hasAttribute, 1 }, - { "hasAttributes", element_hasAttributes, 0 }, +// { "hasAttribute", element_hasAttribute, 1 }, +// { "hasAttributes", element_hasAttributes, 0 }, { NULL } };