mirror of
https://github.com/zebrajr/ladybird.git
synced 2025-12-06 00:19:53 +01:00
LibWeb: Implement XPath functionality using libxml2
This commit is contained in:
parent
f04b866cb0
commit
0ea519c539
|
|
@ -1066,6 +1066,7 @@ set(SOURCES
|
|||
XHR/XMLHttpRequestUpload.cpp
|
||||
XLink/AttributeNames.cpp
|
||||
XML/XMLDocumentBuilder.cpp
|
||||
XPath/XPath.cpp
|
||||
XPath/XPathEvaluator.cpp
|
||||
XPath/XPathExpression.cpp
|
||||
XPath/XPathNSResolver.cpp
|
||||
|
|
@ -1116,7 +1117,9 @@ set(GENERATED_SOURCES
|
|||
|
||||
ladybird_lib(LibWeb web EXPLICIT_SYMBOL_EXPORT)
|
||||
|
||||
target_link_libraries(LibWeb PRIVATE LibCore LibCompress LibCrypto LibJS LibHTTP LibGfx LibIPC LibRegex LibSyntax LibTextCodec LibUnicode LibMedia LibWasm LibXML LibIDL LibURL LibTLS LibRequests LibGC LibThreading skia ${ANGLE_TARGETS} SDL3::SDL3)
|
||||
find_package(LibXml2 REQUIRED)
|
||||
|
||||
target_link_libraries(LibWeb PRIVATE LibCore LibCompress LibCrypto LibJS LibHTTP LibGfx LibIPC LibRegex LibSyntax LibTextCodec LibUnicode LibMedia LibWasm LibXML LibIDL LibURL LibTLS LibRequests LibGC LibThreading skia ${ANGLE_TARGETS} SDL3::SDL3 LibXml2::LibXml2)
|
||||
|
||||
# FIXME: https://github.com/microsoft/vcpkg/issues/42324
|
||||
target_include_directories(LibWeb PRIVATE ${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/include)
|
||||
|
|
|
|||
211
Libraries/LibWeb/XPath/XPath.cpp
Normal file
211
Libraries/LibWeb/XPath/XPath.cpp
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* Copyright (c) 2025, Johannes Gustafsson <johannesgu@outlook.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Format.h>
|
||||
#include <LibWeb/DOM/Attr.h>
|
||||
#include <LibWeb/DOM/CDATASection.h>
|
||||
#include <LibWeb/DOM/Comment.h>
|
||||
#include <LibWeb/DOM/Document.h>
|
||||
#include <LibWeb/DOM/DocumentFragment.h>
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/DOM/NamedNodeMap.h>
|
||||
#include <LibWeb/DOM/Node.h>
|
||||
#include <LibWeb/DOM/NodeType.h>
|
||||
#include <LibWeb/DOM/ProcessingInstruction.h>
|
||||
#include <LibWeb/DOM/Text.h>
|
||||
#include <LibWeb/WebIDL/DOMException.h>
|
||||
|
||||
#include <libxml/parser.h>
|
||||
#include <libxml/tree.h>
|
||||
#include <libxml/valid.h>
|
||||
#include <libxml/xmlstring.h>
|
||||
#include <libxml/xpath.h>
|
||||
|
||||
#include "XPath.h"
|
||||
|
||||
namespace Web::XPath {
|
||||
|
||||
static xmlNodePtr mirror_node(xmlDocPtr doc, DOM::Node const& node)
|
||||
{
|
||||
switch (node.type()) {
|
||||
case DOM::NodeType::INVALID: {
|
||||
return nullptr;
|
||||
}
|
||||
case DOM::NodeType::ELEMENT_NODE: {
|
||||
auto const& element = static_cast<DOM::Element const&>(node);
|
||||
ByteString name = element.local_name().bytes_as_string_view();
|
||||
auto* xml_element = xmlNewDocNode(doc, nullptr, bit_cast<xmlChar const*>(name.characters()), nullptr);
|
||||
xml_element->_private = bit_cast<void*>(&node);
|
||||
for (size_t i = 0; i < element.attribute_list_size(); ++i) {
|
||||
auto const& attribute = *element.attributes()->item(i);
|
||||
ByteString attr_name = attribute.name().bytes_as_string_view();
|
||||
ByteString attr_value = attribute.value().bytes_as_string_view();
|
||||
auto* attr = xmlSetProp(xml_element, bit_cast<xmlChar const*>(attr_name.characters()), bit_cast<xmlChar const*>(attr_value.characters()));
|
||||
attr->_private = bit_cast<void*>(&attribute);
|
||||
|
||||
if (attribute.name() == "id") {
|
||||
xmlAddIDSafe(attr, bit_cast<xmlChar const*>(attr_value.characters()));
|
||||
}
|
||||
}
|
||||
auto children = element.children_as_vector();
|
||||
for (auto& child : children) {
|
||||
xmlAddChild(xml_element, mirror_node(doc, *child));
|
||||
}
|
||||
|
||||
return xml_element;
|
||||
}
|
||||
case DOM::NodeType::ATTRIBUTE_NODE: {
|
||||
return nullptr; // Attributes are handled in the elements children above. If this happens, then the attribute is the top node in the document and therefore invalid
|
||||
}
|
||||
case DOM::NodeType::TEXT_NODE: {
|
||||
auto const& text = static_cast<DOM::Text const&>(node);
|
||||
auto* xml_text = xmlNewDocText(doc, bit_cast<xmlChar const*>(text.data().to_byte_string().characters()));
|
||||
xml_text->_private = bit_cast<void*>(&node);
|
||||
return xml_text;
|
||||
}
|
||||
case DOM::NodeType::CDATA_SECTION_NODE: {
|
||||
auto const& cdata = static_cast<DOM::CDATASection const&>(node);
|
||||
ByteString data = cdata.data().to_byte_string();
|
||||
auto* xml_cdata = xmlNewCDataBlock(doc, bit_cast<xmlChar const*>(data.characters()), data.length());
|
||||
xml_cdata->_private = bit_cast<void*>(&node);
|
||||
return xml_cdata;
|
||||
}
|
||||
case DOM::NodeType::ENTITY_REFERENCE_NODE: // Does not seem to be used at all in ladybird
|
||||
case DOM::NodeType::ENTITY_NODE: // Entity nodes are unused in libxml2
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
case DOM::NodeType::PROCESSING_INSTRUCTION_NODE: {
|
||||
auto const& processing_instruction = static_cast<DOM::ProcessingInstruction const&>(node);
|
||||
auto* xml_pi = xmlNewDocPI(doc, bit_cast<xmlChar const*>(processing_instruction.target().to_byte_string().characters()), bit_cast<xmlChar const*>(processing_instruction.data().to_byte_string().characters()));
|
||||
xml_pi->_private = bit_cast<void*>(&node);
|
||||
return xml_pi;
|
||||
}
|
||||
case DOM::NodeType::COMMENT_NODE: {
|
||||
auto const& comment = static_cast<DOM::Comment const&>(node);
|
||||
auto* xml_comment = xmlNewDocComment(doc, bit_cast<xmlChar const*>(comment.data().to_byte_string().characters()));
|
||||
xml_comment->_private = bit_cast<void*>(&node);
|
||||
return xml_comment;
|
||||
}
|
||||
case DOM::NodeType::DOCUMENT_NODE: {
|
||||
auto const& document = static_cast<DOM::Document const&>(node);
|
||||
return mirror_node(doc, *document.document_element());
|
||||
}
|
||||
case DOM::NodeType::DOCUMENT_TYPE_NODE: {
|
||||
return nullptr; // Unused in libxml2
|
||||
}
|
||||
case DOM::NodeType::DOCUMENT_FRAGMENT_NODE: {
|
||||
auto const& fragment = static_cast<DOM::DocumentFragment const&>(node);
|
||||
auto* xml_fragment = xmlNewDocFragment(doc);
|
||||
xml_fragment->_private = bit_cast<void*>(&node);
|
||||
auto children = fragment.children_as_vector();
|
||||
for (auto& child : children) {
|
||||
xmlAddChild(xml_fragment, mirror_node(doc, *child));
|
||||
}
|
||||
return xml_fragment;
|
||||
}
|
||||
case DOM::NodeType::NOTATION_NODE: {
|
||||
return nullptr; // Unused in libxml2
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void convert_xpath_result(xmlXPathObjectPtr xpath_result, XPath::XPathResult* result, unsigned short type)
|
||||
{
|
||||
if (!xpath_result) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (xpath_result->type) {
|
||||
case XPATH_UNDEFINED:
|
||||
break;
|
||||
case XPATH_NODESET: {
|
||||
Vector<GC::Ptr<DOM::Node>> node_list;
|
||||
|
||||
if (xpath_result->nodesetval && xpath_result->nodesetval->nodeNr > 0) {
|
||||
node_list.ensure_capacity(xpath_result->nodesetval->nodeNr);
|
||||
for (int i = 0; i < xpath_result->nodesetval->nodeNr; i++) {
|
||||
auto* node = xpath_result->nodesetval->nodeTab[i];
|
||||
auto* dom_node = static_cast<DOM::Node*>(node->_private);
|
||||
|
||||
node_list.unchecked_append(dom_node);
|
||||
}
|
||||
}
|
||||
|
||||
result->set_node_set(move(node_list), type);
|
||||
break;
|
||||
}
|
||||
case XPATH_BOOLEAN: {
|
||||
result->set_boolean(xpath_result->boolval);
|
||||
break;
|
||||
}
|
||||
case XPATH_NUMBER: {
|
||||
result->set_number(xpath_result->floatval);
|
||||
break;
|
||||
}
|
||||
case XPATH_STRING: {
|
||||
ReadonlyBytes bytes(xpath_result->stringval, xmlStrlen(xpath_result->stringval));
|
||||
result->set_string(String::from_utf8_without_validation(bytes));
|
||||
break;
|
||||
}
|
||||
case XPATH_USERS:
|
||||
case XPATH_XSLT_TREE: /* An XSLT value tree, non modifiable */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathExpression>> create_expression(JS::Realm& realm, String const& expression, GC::Ptr<XPathNSResolver> resolver)
|
||||
{
|
||||
return realm.create<XPathExpression>(realm, expression, resolver);
|
||||
}
|
||||
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathResult>> evaluate(JS::Realm& realm, String const& expression, DOM::Node const& context_node, GC::Ptr<XPathNSResolver> /*resolver*/, unsigned short type, GC::Ptr<XPathResult> result)
|
||||
{
|
||||
// Parse the expression as xpath
|
||||
ByteString bytes = expression.bytes_as_string_view();
|
||||
auto* xpath_compiled = xmlXPathCompile(bit_cast<xmlChar const*>(bytes.characters()));
|
||||
if (!xpath_compiled)
|
||||
return WebIDL::SyntaxError::create(realm, "Invalid XPath expression"_utf16);
|
||||
ScopeGuard xpath_compiled_cleanup = [&] { xmlXPathFreeCompExpr(xpath_compiled); };
|
||||
|
||||
auto* xml_document = xmlNewDoc(nullptr);
|
||||
ScopeGuard xml_cleanup = [&] { xmlFreeDoc(xml_document); };
|
||||
|
||||
if (context_node.type() == DOM::NodeType::DOCUMENT_NODE) {
|
||||
xml_document->_private = bit_cast<void*>(&context_node);
|
||||
} else {
|
||||
xml_document->_private = bit_cast<void*>(&context_node.document());
|
||||
}
|
||||
|
||||
auto* xml_node = mirror_node(xml_document, context_node);
|
||||
if (!xml_node) {
|
||||
return WebIDL::OperationError::create(realm, "XPath evaluation failed"_utf16);
|
||||
}
|
||||
|
||||
xmlDocSetRootElement(xml_document, xml_node);
|
||||
|
||||
auto* xpath_context = xmlXPathNewContext(xml_document);
|
||||
xmlXPathSetContextNode(xml_node, xpath_context);
|
||||
|
||||
auto* xpath_result = xmlXPathCompiledEval(xpath_compiled, xpath_context);
|
||||
|
||||
ScopeGuard xpath_result_cleanup = [&] {
|
||||
xmlXPathFreeObject(xpath_result);
|
||||
xmlXPathFreeContext(xpath_context);
|
||||
};
|
||||
|
||||
if (!result) {
|
||||
result = realm.create<XPathResult>(realm);
|
||||
}
|
||||
|
||||
convert_xpath_result(xpath_result, result, type);
|
||||
|
||||
return GC::Ref<XPathResult>(*result);
|
||||
}
|
||||
|
||||
}
|
||||
21
Libraries/LibWeb/XPath/XPath.h
Normal file
21
Libraries/LibWeb/XPath/XPath.h
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Copyright (c) 2025, Johannes Gustafsson <johannesgu@outlook.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <LibGC/Ptr.h>
|
||||
#include <LibWeb/WebIDL/ExceptionOr.h>
|
||||
|
||||
#include "XPathExpression.h"
|
||||
#include "XPathNSResolver.h"
|
||||
#include "XPathResult.h"
|
||||
|
||||
namespace Web::XPath {
|
||||
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathExpression>> create_expression(JS::Realm& realm, String const& expression, GC::Ptr<XPathNSResolver> resolver);
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathResult>> evaluate(JS::Realm& realm, String const& expression, DOM::Node const& context_node, GC::Ptr<XPathNSResolver> resolver, unsigned short type, GC::Ptr<XPathResult> result);
|
||||
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@
|
|||
#include <LibWeb/Bindings/XPathEvaluatorPrototype.h>
|
||||
#include <LibWeb/WebIDL/ExceptionOr.h>
|
||||
|
||||
#include "XPath.h"
|
||||
#include "XPathEvaluator.h"
|
||||
#include "XPathExpression.h"
|
||||
#include "XPathResult.h"
|
||||
|
|
@ -39,13 +40,13 @@ void XPathEvaluator::initialize(JS::Realm& realm)
|
|||
WebIDL::ExceptionOr<GC::Ref<XPathExpression>> XPathEvaluator::create_expression(String const& expression, GC::Ptr<XPathNSResolver> resolver)
|
||||
{
|
||||
auto& realm = this->realm();
|
||||
return realm.create<XPathExpression>(realm, expression, resolver);
|
||||
return XPath::create_expression(realm, expression, resolver);
|
||||
}
|
||||
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathResult>> XPathEvaluator::evaluate(String const&, DOM::Node const&, GC::Ptr<XPathNSResolver>, WebIDL::UnsignedShort, GC::Ptr<XPathResult>)
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathResult>> XPathEvaluator::evaluate(String const& expression, DOM::Node const& context_node, GC::Ptr<XPathNSResolver> resolver, WebIDL::UnsignedShort type, GC::Ptr<XPathResult> result)
|
||||
{
|
||||
auto& realm = this->realm();
|
||||
return realm.create<XPathResult>(realm);
|
||||
return XPath::evaluate(realm, expression, context_node, resolver, type, result);
|
||||
}
|
||||
|
||||
GC::Ref<DOM::Node> XPathEvaluator::create_ns_resolver(GC::Ref<DOM::Node> node_resolver)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include <LibWeb/DOM/Node.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
|
||||
#include "XPath.h"
|
||||
#include "XPathEvaluator.h"
|
||||
#include "XPathExpression.h"
|
||||
#include "XPathResult.h"
|
||||
|
|
@ -40,10 +41,10 @@ void XPathExpression::visit_edges(Cell::Visitor& visitor)
|
|||
|
||||
XPathExpression::~XPathExpression() = default;
|
||||
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathResult>> XPathExpression::evaluate(DOM::Node const&, WebIDL::UnsignedShort, GC::Ptr<XPathResult>)
|
||||
WebIDL::ExceptionOr<GC::Ref<XPathResult>> XPathExpression::evaluate(DOM::Node const& context_node, WebIDL::UnsignedShort type, GC::Ptr<XPathResult> result)
|
||||
{
|
||||
auto& realm = this->realm();
|
||||
return realm.create<XPathResult>(realm);
|
||||
return XPath::evaluate(realm, m_expression, context_node, m_resolver, type, result);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,34 @@ void XPathResult::visit_edges(Cell::Visitor& visitor)
|
|||
|
||||
XPathResult::~XPathResult() = default;
|
||||
|
||||
void XPathResult::set_number(WebIDL::Double number_value)
|
||||
{
|
||||
m_result_type = NUMBER_TYPE;
|
||||
m_number_value = number_value;
|
||||
}
|
||||
void XPathResult::set_string(String string_value)
|
||||
{
|
||||
m_result_type = STRING_TYPE;
|
||||
m_string_value = move(string_value);
|
||||
}
|
||||
|
||||
void XPathResult::set_boolean(bool boolean_value)
|
||||
{
|
||||
m_result_type = BOOLEAN_TYPE;
|
||||
m_boolean_value = boolean_value;
|
||||
}
|
||||
|
||||
void XPathResult::set_node_set(Vector<GC::Ptr<DOM::Node>> node_set, unsigned short type)
|
||||
{
|
||||
if (type >= XPathResult::UNORDERED_NODE_ITERATOR_TYPE && type <= XPathResult::FIRST_ORDERED_NODE_TYPE)
|
||||
m_result_type = type;
|
||||
else
|
||||
m_result_type = UNORDERED_NODE_ITERATOR_TYPE; // Default if the caller does not explicity ask for anything else
|
||||
|
||||
m_node_set = move(node_set);
|
||||
m_node_set_iter = m_node_set.begin();
|
||||
}
|
||||
|
||||
GC::Ptr<DOM::Node> XPathResult::iterate_next()
|
||||
{
|
||||
if (m_node_set_iter == m_node_set.end())
|
||||
|
|
|
|||
|
|
@ -46,6 +46,11 @@ public:
|
|||
GC::Ptr<DOM::Node> iterate_next();
|
||||
GC::Ptr<DOM::Node> snapshot_item(int index);
|
||||
|
||||
void set_number(WebIDL::Double number_value);
|
||||
void set_string(String string_value);
|
||||
void set_boolean(bool boolean_value);
|
||||
void set_node_set(Vector<GC::Ptr<DOM::Node>> node_set, unsigned short type);
|
||||
|
||||
private:
|
||||
WebIDL::UnsignedShort m_result_type;
|
||||
WebIDL::Double m_number_value;
|
||||
|
|
|
|||
|
|
@ -102,6 +102,7 @@
|
|||
"name": "mman",
|
||||
"platform": "windows"
|
||||
},
|
||||
"libxml2",
|
||||
"openssl",
|
||||
{
|
||||
"name": "qtbase",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user