Skip to content

Commit

Permalink
Implement CSS selectors
Browse files Browse the repository at this point in the history
  • Loading branch information
nielsdos committed Jun 29, 2024
1 parent 7defc23 commit 88da914
Show file tree
Hide file tree
Showing 50 changed files with 4,073 additions and 16 deletions.
1 change: 1 addition & 0 deletions UPGRADING.INTERNALS
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ PHP 8.4 INTERNALS UPGRADE NOTES
- Removed the "properties" HashTable field from php_libxml_node_object.
- Added a way to attached private data to a php_libxml_ref_obj.
- Added a way to fix a class type onto php_libxml_ref_obj.
- Added a way to record quirks mode in php_libxml_ref_obj.
- Added php_libxml_uses_internal_errors().
- Added a way to override document handlers (e.g. serialization) with
php_libxml_document_handlers.
Expand Down
12 changes: 10 additions & 2 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
ignore:
- "ext/dom/lexbor/lexbor" # bundled library
- "ext/pcre/pcre2lib" # bundled library
# bundled libraries
- "ext/dom/lexbor/lexbor/core"
- "ext/dom/lexbor/lexbor/css"
- "ext/dom/lexbor/lexbor/dom"
- "ext/dom/lexbor/lexbor/encoding"
- "ext/dom/lexbor/lexbor/html"
- "ext/dom/lexbor/lexbor/ns"
- "ext/dom/lexbor/lexbor/ports"
- "ext/dom/lexbor/lexbor/tag"
- "ext/pcre/pcre2lib"
12 changes: 9 additions & 3 deletions ext/dom/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ if test "$PHP_DOM" != "no"; then
$LEXBOR_DIR/encoding/big5.c $LEXBOR_DIR/encoding/decode.c $LEXBOR_DIR/encoding/encode.c $LEXBOR_DIR/encoding/encoding.c $LEXBOR_DIR/encoding/euc_kr.c $LEXBOR_DIR/encoding/gb18030.c $LEXBOR_DIR/encoding/iso_2022_jp_katakana.c $LEXBOR_DIR/encoding/jis0208.c $LEXBOR_DIR/encoding/jis0212.c $LEXBOR_DIR/encoding/range.c $LEXBOR_DIR/encoding/res.c $LEXBOR_DIR/encoding/single.c \
$LEXBOR_DIR/html/encoding.c $LEXBOR_DIR/html/interface.c $LEXBOR_DIR/html/parser.c $LEXBOR_DIR/html/token.c $LEXBOR_DIR/html/token_attr.c $LEXBOR_DIR/html/tokenizer.c $LEXBOR_DIR/html/tree.c \
$LEXBOR_DIR/html/interfaces/anchor_element.c $LEXBOR_DIR/html/interfaces/area_element.c $LEXBOR_DIR/html/interfaces/audio_element.c $LEXBOR_DIR/html/interfaces/base_element.c $LEXBOR_DIR/html/interfaces/body_element.c $LEXBOR_DIR/html/interfaces/br_element.c $LEXBOR_DIR/html/interfaces/button_element.c $LEXBOR_DIR/html/interfaces/canvas_element.c $LEXBOR_DIR/html/interfaces/data_element.c $LEXBOR_DIR/html/interfaces/data_list_element.c $LEXBOR_DIR/html/interfaces/details_element.c $LEXBOR_DIR/html/interfaces/dialog_element.c $LEXBOR_DIR/html/interfaces/directory_element.c $LEXBOR_DIR/html/interfaces/div_element.c $LEXBOR_DIR/html/interfaces/d_list_element.c $LEXBOR_DIR/html/interfaces/document.c $LEXBOR_DIR/html/interfaces/element.c $LEXBOR_DIR/html/interfaces/embed_element.c $LEXBOR_DIR/html/interfaces/field_set_element.c $LEXBOR_DIR/html/interfaces/font_element.c $LEXBOR_DIR/html/interfaces/form_element.c $LEXBOR_DIR/html/interfaces/frame_element.c $LEXBOR_DIR/html/interfaces/frame_set_element.c $LEXBOR_DIR/html/interfaces/head_element.c $LEXBOR_DIR/html/interfaces/heading_element.c $LEXBOR_DIR/html/interfaces/hr_element.c $LEXBOR_DIR/html/interfaces/html_element.c $LEXBOR_DIR/html/interfaces/iframe_element.c $LEXBOR_DIR/html/interfaces/image_element.c $LEXBOR_DIR/html/interfaces/input_element.c $LEXBOR_DIR/html/interfaces/label_element.c $LEXBOR_DIR/html/interfaces/legend_element.c $LEXBOR_DIR/html/interfaces/li_element.c $LEXBOR_DIR/html/interfaces/link_element.c $LEXBOR_DIR/html/interfaces/map_element.c $LEXBOR_DIR/html/interfaces/marquee_element.c $LEXBOR_DIR/html/interfaces/media_element.c $LEXBOR_DIR/html/interfaces/menu_element.c $LEXBOR_DIR/html/interfaces/meta_element.c $LEXBOR_DIR/html/interfaces/meter_element.c $LEXBOR_DIR/html/interfaces/mod_element.c $LEXBOR_DIR/html/interfaces/object_element.c $LEXBOR_DIR/html/interfaces/o_list_element.c $LEXBOR_DIR/html/interfaces/opt_group_element.c $LEXBOR_DIR/html/interfaces/option_element.c $LEXBOR_DIR/html/interfaces/output_element.c $LEXBOR_DIR/html/interfaces/paragraph_element.c $LEXBOR_DIR/html/interfaces/param_element.c $LEXBOR_DIR/html/interfaces/picture_element.c $LEXBOR_DIR/html/interfaces/pre_element.c $LEXBOR_DIR/html/interfaces/progress_element.c $LEXBOR_DIR/html/interfaces/quote_element.c $LEXBOR_DIR/html/interfaces/script_element.c $LEXBOR_DIR/html/interfaces/select_element.c $LEXBOR_DIR/html/interfaces/slot_element.c $LEXBOR_DIR/html/interfaces/source_element.c $LEXBOR_DIR/html/interfaces/span_element.c $LEXBOR_DIR/html/interfaces/style_element.c $LEXBOR_DIR/html/interfaces/table_caption_element.c $LEXBOR_DIR/html/interfaces/table_cell_element.c $LEXBOR_DIR/html/interfaces/table_col_element.c $LEXBOR_DIR/html/interfaces/table_element.c $LEXBOR_DIR/html/interfaces/table_row_element.c $LEXBOR_DIR/html/interfaces/table_section_element.c $LEXBOR_DIR/html/interfaces/template_element.c $LEXBOR_DIR/html/interfaces/text_area_element.c $LEXBOR_DIR/html/interfaces/time_element.c $LEXBOR_DIR/html/interfaces/title_element.c $LEXBOR_DIR/html/interfaces/track_element.c $LEXBOR_DIR/html/interfaces/u_list_element.c $LEXBOR_DIR/html/interfaces/unknown_element.c $LEXBOR_DIR/html/interfaces/video_element.c $LEXBOR_DIR/html/interfaces/window.c \
$LEXBOR_DIR/selectors/selectors.c \
$LEXBOR_DIR/css/state.c $LEXBOR_DIR/css/log.c $LEXBOR_DIR/css/parser.c $LEXBOR_DIR/css/selectors/state.c $LEXBOR_DIR/css/selectors/selectors.c $LEXBOR_DIR/css/selectors/selector.c $LEXBOR_DIR/css/selectors/pseudo_state.c $LEXBOR_DIR/css/selectors/pseudo.c $LEXBOR_DIR/css/syntax/tokenizer/error.c $LEXBOR_DIR/css/syntax/state.c $LEXBOR_DIR/css/syntax/parser.c $LEXBOR_DIR/css/syntax/syntax.c $LEXBOR_DIR/css/syntax/anb.c $LEXBOR_DIR/css/syntax/tokenizer.c $LEXBOR_DIR/css/syntax/token.c $LEXBOR_DIR/css/css.c \
$LEXBOR_DIR/selectors-adapted/selectors.c \
$LEXBOR_DIR/ns/ns.c \
$LEXBOR_DIR/tag/tag.c"
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c \
domexception.c \
parentnode/tree.c parentnode/css_selectors.c \
processinginstruction.c cdatasection.c \
documentfragment.c domimplementation.c \
element.c node.c characterdata.c \
Expand All @@ -38,14 +40,18 @@ if test "$PHP_DOM" != "no"; then
namednodemap.c xpath_callbacks.c \
$LEXBOR_SOURCES],
$ext_shared,,$PHP_LEXBOR_CFLAGS)
PHP_ADD_BUILD_DIR($ext_builddir/parentnode)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/ports/posix/lexbor/core)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/core)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/dom/interfaces)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/tree/insertion_mode)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/tokenizer)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/html/interfaces)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/encoding)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/selectors)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/tokenizer)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/css/syntax/tokenizer)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/selectors-adapted)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/ns)
PHP_ADD_BUILD_DIR($ext_builddir/$LEXBOR_DIR/tag)
PHP_SUBST(DOM_SHARED_LIBADD)
Expand Down
9 changes: 7 additions & 2 deletions ext/dom/config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ if (PHP_DOM == "yes") {
) {
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c processinginstruction.c \
domexception.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c \
node.c characterdata.c documenttype.c \
entity.c nodelist.c html_collection.c text.c comment.c \
entityreference.c \
notation.c xpath.c dom_iterators.c \
namednodemap.c xpath_callbacks.c", null, "-Iext/dom/lexbor");

ADD_SOURCES("ext/dom/parentnode", "tree.c css_selectors.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/ports/windows_nt/lexbor/core", "memory.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c print.c serialize.c shs.c str.c strtod.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/dom", "interface.c", "dom");
Expand All @@ -27,7 +28,11 @@ if (PHP_DOM == "yes") {
ADD_SOURCES("ext/dom/lexbor/lexbor/html", "encoding.c interface.c parser.c token.c token_attr.c tokenizer.c tree.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/encoding", "big5.c decode.c encode.c encoding.c euc_kr.c gb18030.c iso_2022_jp_katakana.c jis0208.c jis0212.c range.c res.c single.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/html/interfaces", "anchor_element.c area_element.c audio_element.c base_element.c body_element.c br_element.c button_element.c canvas_element.c data_element.c data_list_element.c details_element.c dialog_element.c directory_element.c div_element.c d_list_element.c document.c element.c embed_element.c field_set_element.c font_element.c form_element.c frame_element.c frame_set_element.c head_element.c heading_element.c hr_element.c html_element.c iframe_element.c image_element.c input_element.c label_element.c legend_element.c li_element.c link_element.c map_element.c marquee_element.c media_element.c menu_element.c meta_element.c meter_element.c mod_element.c object_element.c o_list_element.c opt_group_element.c option_element.c output_element.c paragraph_element.c param_element.c picture_element.c pre_element.c progress_element.c quote_element.c script_element.c select_element.c slot_element.c source_element.c span_element.c style_element.c table_caption_element.c table_cell_element.c table_col_element.c table_element.c table_row_element.c table_section_element.c template_element.c text_area_element.c time_element.c title_element.c track_element.c u_list_element.c unknown_element.c video_element.c window.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/selectors", "selectors.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/selectors-adapted", "selectors.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/css", "state.c log.c parser.c css.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/css/selectors", "state.c selectors.c selector.c pseudo_state.c pseudo.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax", "state.c parser.c syntax.c anb.c tokenizer.c token.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/css/syntax/tokenizer", "error.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/ns", "ns.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/tag", "tag.c", "dom");
ADD_FLAG("CFLAGS_DOM", "/D LEXBOR_STATIC ");
Expand Down
3 changes: 3 additions & 0 deletions ext/dom/domexception.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,7 @@ typedef enum {
VALIDATION_ERR = 16
} dom_exception_code;

void php_dom_throw_error(dom_exception_code error_code, bool strict_error);
void php_dom_throw_error_with_message(dom_exception_code error_code, const char *error_message, bool strict_error);

#endif /* DOM_EXCEPTION_H */
62 changes: 62 additions & 0 deletions ext/dom/element.c
Original file line number Diff line number Diff line change
Expand Up @@ -1752,4 +1752,66 @@ PHP_METHOD(DOMElement, toggleAttribute)
}
/* }}} end DOMElement::prepend */

static void php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAMETERS, bool all)
{
zend_string *selectors_str;

ZEND_PARSE_PARAMETERS_START(1, 1)
Z_PARAM_STR(selectors_str)
ZEND_PARSE_PARAMETERS_END();

xmlNodePtr thisp;
dom_object *intern;
zval *id;
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);

if (all) {
dom_parent_node_query_selector_all(thisp, intern, return_value, selectors_str);
} else {
dom_parent_node_query_selector(thisp, intern, return_value, selectors_str);
}
}

PHP_METHOD(Dom_Element, querySelector)
{
php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
}

PHP_METHOD(Dom_Element, querySelectorAll)
{
php_dom_dispatch_query_selector(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
}

PHP_METHOD(Dom_Element, matches)
{
zend_string *selectors_str;

ZEND_PARSE_PARAMETERS_START(1, 1)
Z_PARAM_STR(selectors_str)
ZEND_PARSE_PARAMETERS_END();

xmlNodePtr thisp;
dom_object *intern;
zval *id;
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);

dom_element_matches(thisp, intern, return_value, selectors_str);
}

PHP_METHOD(Dom_Element, closest)
{
zend_string *selectors_str;

ZEND_PARSE_PARAMETERS_START(1, 1)
Z_PARAM_STR(selectors_str)
ZEND_PARSE_PARAMETERS_END();

xmlNodePtr thisp;
dom_object *intern;
zval *id;
DOM_GET_THIS_OBJ(thisp, id, xmlNodePtr, intern);

dom_element_closest(thisp, intern, return_value, selectors_str);
}

#endif
2 changes: 2 additions & 0 deletions ext/dom/html5_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "html5_parser.h"
#include <lexbor/html/parser.h>
#include <lexbor/html/interfaces/element.h>
#include <lexbor/dom/dom.h>
#include <libxml/parserInternals.h>
#include <libxml/HTMLtree.h>
#include <Zend/zend.h>
Expand Down Expand Up @@ -380,6 +381,7 @@ void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxm
observations->has_explicit_html_tag = tree->has_explicit_html_tag;
observations->has_explicit_head_tag = tree->has_explicit_head_tag;
observations->has_explicit_body_tag = tree->has_explicit_body_tag;
observations->quirks_mode = lxb_dom_interface_document(tree->document)->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS;
}

#endif /* HAVE_LIBXML && HAVE_DOM */
1 change: 1 addition & 0 deletions ext/dom/html5_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ typedef struct _lexbor_libxml2_bridge_extracted_observations {
bool has_explicit_html_tag;
bool has_explicit_head_tag;
bool has_explicit_body_tag;
bool quirks_mode;
} lexbor_libxml2_bridge_extracted_observations;

typedef struct _lexbor_libxml2_bridge_parse_context {
Expand Down
2 changes: 2 additions & 0 deletions ext/dom/html_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
NULL
);
dom_set_xml_class(intern->document);
intern->document->quirks_mode = ctx.observations.quirks_mode;
intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper);
return;

Expand Down Expand Up @@ -1137,6 +1138,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
NULL
);
dom_set_xml_class(intern->document);
intern->document->quirks_mode = ctx.observations.quirks_mode;
intern->document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper);
return;

Expand Down
Loading

0 comments on commit 88da914

Please sign in to comment.