From 086ea4cd7f449579d8f0a5d5fcafaedb060d2b7f Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 19 Jan 2025 15:00:24 +0100 Subject: [PATCH] Add simplexml_load_stream() --- ext/simplexml/simplexml.c | 107 ++++++++++++------ ext/simplexml/simplexml.stub.php | 3 + ext/simplexml/simplexml_arginfo.h | 14 ++- .../tests/simplexml_load_stream_broken.phpt | 51 +++++++++ .../tests/simplexml_load_stream_errors.phpt | 18 +++ .../tests/simplexml_load_stream_memory.phpt | 35 ++++++ ...exml_load_stream_memory_with_encoding.phpt | 27 +++++ 7 files changed, 219 insertions(+), 36 deletions(-) create mode 100644 ext/simplexml/tests/simplexml_load_stream_broken.phpt create mode 100644 ext/simplexml/tests/simplexml_load_stream_errors.phpt create mode 100644 ext/simplexml/tests/simplexml_load_stream_memory.phpt create mode 100644 ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index 3dcf7ca8fb049..33feb91d55058 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -2183,17 +2183,37 @@ sxe_object_new(zend_class_entry *ce) } /* }}} */ +static void sxe_create_obj_from_doc(zval *return_value, xmlDocPtr docp, zend_class_entry *ce, zend_string *ns, bool isprefix) +{ + if (!docp) { + RETURN_FALSE; + } + + zend_function *fptr_count; + if (!ce) { + ce = ce_SimpleXMLElement; + fptr_count = NULL; + } else { + fptr_count = php_sxe_find_fptr_count(ce); + } + php_sxe_object *sxe = php_sxe_object_new(ce, fptr_count); + sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL; + sxe->iter.isprefix = isprefix; + php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp); + php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL); + + RETURN_OBJ(&sxe->zo); +} + /* {{{ Load a filename and return a simplexml_element object to allow for processing */ PHP_FUNCTION(simplexml_load_file) { - php_sxe_object *sxe; char *filename; size_t filename_len; xmlDocPtr docp; zend_string *ns = zend_empty_string; zend_long options = 0; zend_class_entry *ce= ce_SimpleXMLElement; - zend_function *fptr_count; bool isprefix = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|C!lSb", &filename, &filename_len, &ce, &options, &ns, &isprefix) == FAILURE) { @@ -2209,37 +2229,70 @@ PHP_FUNCTION(simplexml_load_file) docp = xmlReadFile(filename, NULL, (int)options); PHP_LIBXML_RESTORE_GLOBALS(read_file); - if (!docp) { - RETURN_FALSE; + sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix); +} +/* }}} */ + +static int sxe_stream_read(void *context, char *buffer, int len) +{ + zend_resource *resource = context; + if (EXPECTED(resource->ptr)) { + php_stream *stream = resource->ptr; + return php_stream_read(stream, buffer, len); } + return -1; +} - if (!ce) { - ce = ce_SimpleXMLElement; - fptr_count = NULL; - } else { - fptr_count = php_sxe_find_fptr_count(ce); +PHP_FUNCTION(simplexml_load_stream) +{ + zval *stream_zv; + php_stream *stream; + xmlDocPtr docp; + zend_string *ns = zend_empty_string; + zend_long options = 0; + zend_class_entry *ce = ce_SimpleXMLElement; + bool isprefix = 0; + const char *encoding = NULL; + const char *document_uri = NULL; + size_t encoding_len, document_uri_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "r|p!p!C!lSb", + &stream_zv, &encoding, &encoding_len, &document_uri, &document_uri_len, &ce, &options, &ns, &isprefix) == FAILURE) { + RETURN_THROWS(); } - sxe = php_sxe_object_new(ce, fptr_count); - sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL; - sxe->iter.isprefix = isprefix; - php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp); - php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL); - RETURN_OBJ(&sxe->zo); + php_stream_from_res(stream, Z_RES_P(stream_zv)); + + if (!php_libxml_is_valid_encoding(encoding)) { + zend_argument_value_error(2, "must be a valid character encoding"); + RETURN_THROWS(); + } + + if (ZEND_LONG_EXCEEDS_INT(options)) { + zend_argument_value_error(5, "is too large"); + RETURN_THROWS(); + } + + if (encoding) { + options |= XML_PARSE_IGNORE_ENC; + } + + PHP_LIBXML_SANITIZE_GLOBALS(read_file); + docp = xmlReadIO(sxe_stream_read, NULL, stream->res, document_uri, encoding, (int) options); + PHP_LIBXML_RESTORE_GLOBALS(read_file); + + sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix); } -/* }}} */ /* {{{ Load a string and return a simplexml_element object to allow for processing */ PHP_FUNCTION(simplexml_load_string) { - php_sxe_object *sxe; char *data; size_t data_len; xmlDocPtr docp; zend_string *ns = zend_empty_string; zend_long options = 0; zend_class_entry *ce= ce_SimpleXMLElement; - zend_function *fptr_count; bool isprefix = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|C!lSb", &data, &data_len, &ce, &options, &ns, &isprefix) == FAILURE) { @@ -2263,23 +2316,7 @@ PHP_FUNCTION(simplexml_load_string) docp = xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options); PHP_LIBXML_RESTORE_GLOBALS(read_memory); - if (!docp) { - RETURN_FALSE; - } - - if (!ce) { - ce = ce_SimpleXMLElement; - fptr_count = NULL; - } else { - fptr_count = php_sxe_find_fptr_count(ce); - } - sxe = php_sxe_object_new(ce, fptr_count); - sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL; - sxe->iter.isprefix = isprefix; - php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp); - php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL); - - RETURN_OBJ(&sxe->zo); + sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix); } /* }}} */ diff --git a/ext/simplexml/simplexml.stub.php b/ext/simplexml/simplexml.stub.php index 2053fec6fdd2e..7386b4fdc1c1b 100644 --- a/ext/simplexml/simplexml.stub.php +++ b/ext/simplexml/simplexml.stub.php @@ -4,6 +4,9 @@ function simplexml_load_file(string $filename, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {} +/** @param resource $stream */ +function simplexml_load_stream($stream, ?string $encoding = null, ?string $document_uri = null, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {} + function simplexml_load_string(string $data, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {} function simplexml_import_dom(object $node, ?string $class_name = SimpleXMLElement::class): ?SimpleXMLElement {} diff --git a/ext/simplexml/simplexml_arginfo.h b/ext/simplexml/simplexml_arginfo.h index 379307f953a10..5a13186b275af 100644 --- a/ext/simplexml/simplexml_arginfo.h +++ b/ext/simplexml/simplexml_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 36eac2dee86bcc386c24e2cc14caa7bd3d709e82 */ + * Stub hash: a238d5299e5c2d1cbf10c1270d294809af05d5eb */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_file, 0, 1, SimpleXMLElement, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0) @@ -9,6 +9,16 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_file, 0, 1, S ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, is_prefix, _IS_BOOL, 0, "false") ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_stream, 0, 1, SimpleXMLElement, MAY_BE_FALSE) + ZEND_ARG_INFO(0, stream) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, document_uri, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, class_name, IS_STRING, 1, "SimpleXMLElement::class") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, namespace_or_prefix, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, is_prefix, _IS_BOOL, 0, "false") +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_string, 0, 1, SimpleXMLElement, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, class_name, IS_STRING, 1, "SimpleXMLElement::class") @@ -101,6 +111,7 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_SimpleXMLElement_ ZEND_END_ARG_INFO() ZEND_FUNCTION(simplexml_load_file); +ZEND_FUNCTION(simplexml_load_stream); ZEND_FUNCTION(simplexml_load_string); ZEND_FUNCTION(simplexml_import_dom); ZEND_METHOD(SimpleXMLElement, xpath); @@ -126,6 +137,7 @@ ZEND_METHOD(SimpleXMLElement, getChildren); static const zend_function_entry ext_functions[] = { ZEND_FE(simplexml_load_file, arginfo_simplexml_load_file) + ZEND_FE(simplexml_load_stream, arginfo_simplexml_load_stream) ZEND_FE(simplexml_load_string, arginfo_simplexml_load_string) ZEND_FE(simplexml_import_dom, arginfo_simplexml_import_dom) ZEND_FE_END diff --git a/ext/simplexml/tests/simplexml_load_stream_broken.phpt b/ext/simplexml/tests/simplexml_load_stream_broken.phpt new file mode 100644 index 0000000000000..77f4436d5d2f9 --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_broken.phpt @@ -0,0 +1,51 @@ +--TEST-- +simplexml_load_stream() - from broken stream +--EXTENSIONS-- +simplexml +--FILE-- +first) { + $this->first = false; + return ""; + } + return false; + } + + public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) { + return true; + } + + public function stream_close(): void { + } + + public function stream_eof(): bool { + return !$this->first; + } +} + +stream_wrapper_register("foo", MyStream::class); + +$tmp = fopen("foo://", "r"); +$sxe = simplexml_load_stream($tmp); +fclose($tmp); + +var_dump($sxe); + +?> +--EXPECTF-- +int(8192) +int(8192) +%A +Warning: simplexml_load_stream(): Entity: line 1: parser error : Premature end of data in tag root line 1 in %s on line %d + +Warning: simplexml_load_stream(): in %s on line %d + +Warning: simplexml_load_stream(): ^ in %s on line %d +bool(false) diff --git a/ext/simplexml/tests/simplexml_load_stream_errors.phpt b/ext/simplexml/tests/simplexml_load_stream_errors.phpt new file mode 100644 index 0000000000000..b863dfafe0744 --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_errors.phpt @@ -0,0 +1,18 @@ +--TEST-- +simplexml_load_stream() - errors +--EXTENSIONS-- +simplexml +--FILE-- +getMessage(), "\n"; +} +fclose($tmp); + +?> +--EXPECT-- +simplexml_load_stream(): Argument #2 ($encoding) must be a valid character encoding diff --git a/ext/simplexml/tests/simplexml_load_stream_memory.phpt b/ext/simplexml/tests/simplexml_load_stream_memory.phpt new file mode 100644 index 0000000000000..9a43a05495677 --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_memory.phpt @@ -0,0 +1,35 @@ +--TEST-- +simplexml_load_stream() - from memory stream +--EXTENSIONS-- +simplexml +--FILE-- +"); +rewind($tmp); +$sxe1 = simplexml_load_stream($tmp); +rewind($tmp); +$sxe2 = simplexml_load_stream($tmp, document_uri: 'http://example.com'); +fclose($tmp); + +var_dump($sxe1, $sxe2); + +?> +--EXPECTF-- +object(SimpleXMLElement)#%d (2) { + ["child1"]=> + object(SimpleXMLElement)#%d (0) { + } + ["child2"]=> + object(SimpleXMLElement)#%d (0) { + } +} +object(SimpleXMLElement)#%d (2) { + ["child1"]=> + object(SimpleXMLElement)#%d (0) { + } + ["child2"]=> + object(SimpleXMLElement)#%d (0) { + } +} diff --git a/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt b/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt new file mode 100644 index 0000000000000..54b717a622a8f --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt @@ -0,0 +1,27 @@ +--TEST-- +simplexml_load_stream() - from memory stream with encoding +--EXTENSIONS-- +simplexml +--FILE-- +ééé'); +rewind($tmp); +$sxe1 = simplexml_load_stream($tmp, encoding: 'UTF-8'); +rewind($tmp); +$sxe2 = simplexml_load_stream($tmp); +fclose($tmp); + +var_dump($sxe1, $sxe2); + +?> +--EXPECTF-- +object(SimpleXMLElement)#%d (1) { + [0]=> + string(6) "ééé" +} +object(SimpleXMLElement)#%d (1) { + [0]=> + string(18) "テゥテゥテゥ" +}