From eb631980968a1cf5ed025549a730e0258743d959 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 19 Jan 2025 05:29:20 -0700 Subject: [PATCH] Fixes to bytes_to_utf8-type functions This commit turns bytes_to_utf8() back into an (inline) function, and changes the type of a parameter in bytes_to_utf8_free_me() to void*, which is a more accurate type for it. Fixes #22902 --- embed.fnc | 4 ++-- embed.h | 2 +- inline.h | 7 ++++++- proto.h | 10 ++++++---- utf8.c | 4 ++-- utf8.h | 1 - 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/embed.fnc b/embed.fnc index 43b9102e8fdc..05f0e1879c66 100644 --- a/embed.fnc +++ b/embed.fnc @@ -794,12 +794,12 @@ Adp |int |bytes_cmp_utf8 |NN const U8 *b \ Adp |U8 * |bytes_from_utf8|NN const U8 *s \ |NN STRLEN *lenp \ |NN bool *is_utf8p -Admp |U8 * |bytes_to_utf8 |NN const U8 *s \ +Adip |U8 * |bytes_to_utf8 |NN const U8 *s \ |NN STRLEN *lenp Adp |U8 * |bytes_to_utf8_free_me \ |NN const U8 *s \ |NN STRLEN *lenp \ - |NULLOK const U8 **free_me + |NULLOK void **free_me AOdp |SSize_t|call_argv |NN const char *sub_name \ |I32 flags \ |NN char **argv diff --git a/embed.h b/embed.h index e4b4bb5e1cd9..c34e5fc27216 100644 --- a/embed.h +++ b/embed.h @@ -155,7 +155,7 @@ # define block_start(a) Perl_block_start(aTHX_ a) # define bytes_cmp_utf8(a,b,c,d) Perl_bytes_cmp_utf8(aTHX_ a,b,c,d) # define bytes_from_utf8(a,b,c) Perl_bytes_from_utf8(aTHX_ a,b,c) -# define bytes_to_utf8(a,b) Perl_bytes_to_utf8(aTHX,a,b) +# define bytes_to_utf8(a,b) Perl_bytes_to_utf8(aTHX_ a,b) # define bytes_to_utf8_free_me(a,b,c) Perl_bytes_to_utf8_free_me(aTHX_ a,b,c) # define c9strict_utf8_to_uv Perl_c9strict_utf8_to_uv # define call_argv(a,b,c) Perl_call_argv(aTHX_ a,b,c) diff --git a/inline.h b/inline.h index 5c2856060870..490e09da4fca 100644 --- a/inline.h +++ b/inline.h @@ -1132,7 +1132,6 @@ in lvalue context. =cut */ - PERL_STATIC_INLINE bool Perl_rpp_is_lone(pTHX_ SV *sv) { @@ -1231,6 +1230,12 @@ Perl_append_utf8_from_native_byte(const U8 byte, U8** dest) } } +PERL_STATIC_INLINE U8 * +Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp) +{ + return bytes_to_utf8_free_me(s, lenp, NULL); +} + /* =for apidoc valid_utf8_to_uvchr Like C>, but should only be called when it is diff --git a/proto.h b/proto.h index 08ec30c5ce15..40a98587be69 100644 --- a/proto.h +++ b/proto.h @@ -398,11 +398,8 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8p); #define PERL_ARGS_ASSERT_BYTES_FROM_UTF8 \ assert(s); assert(lenp); assert(is_utf8p) -/* PERL_CALLCONV U8 * -Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp); */ - PERL_CALLCONV U8 * -Perl_bytes_to_utf8_free_me(pTHX_ const U8 *s, STRLEN *lenp, const U8 **free_me); +Perl_bytes_to_utf8_free_me(pTHX_ const U8 *s, STRLEN *lenp, void **free_me); #define PERL_ARGS_ASSERT_BYTES_TO_UTF8_FREE_ME \ assert(s); assert(lenp) @@ -9651,6 +9648,11 @@ Perl_av_store_simple(pTHX_ AV *av, SSize_t key, SV *val); # define PERL_ARGS_ASSERT_AV_STORE_SIMPLE \ assert(av); assert(SvTYPE(av) == SVt_PVAV) +PERL_STATIC_INLINE U8 * +Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp); +# define PERL_ARGS_ASSERT_BYTES_TO_UTF8 \ + assert(s); assert(lenp) + PERL_STATIC_INLINE void Perl_clear_defarray_simple(pTHX_ AV *av); # define PERL_ARGS_ASSERT_CLEAR_DEFARRAY_SIMPLE \ diff --git a/utf8.c b/utf8.c index 89f3f89df79a..3c5898e7f05e 100644 --- a/utf8.c +++ b/utf8.c @@ -3269,7 +3269,7 @@ But when it is a non-NULL pointer, C stores into it either NULL if no memory was allocated; or a pointer to that new memory. This allows the following convenient paradigm: - U8 * free_me; + void * free_me; U8 converted = bytes_to_utf8_free_me(string, &len, &free_me); ... @@ -3292,7 +3292,7 @@ EBCDIC), see L(). U8* Perl_bytes_to_utf8_free_me(pTHX_ const U8 *s, Size_t *lenp, - const U8 ** free_me_ptr) + void ** free_me_ptr) { PERL_ARGS_ASSERT_BYTES_TO_UTF8_FREE_ME; PERL_UNUSED_CONTEXT; diff --git a/utf8.h b/utf8.h index f95311637c34..6ed7c3304e4d 100644 --- a/utf8.h +++ b/utf8.h @@ -1330,7 +1330,6 @@ point's representation. #define Perl_is_utf8_char_buf(buf, buf_end) isUTF8_CHAR(buf, buf_end) -#define Perl_bytes_to_utf8(mTHX, s, lenp) Perl_bytes_to_utf8_free_me(aTHX_ s, lenp, NULL) typedef enum { PL_utf8_to_bytes_overwrite = 0, PL_utf8_to_bytes_new_memory,