From e6f6fc5f7a358c95cbc90dbab5448584329e4ead Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Thu, 16 Jan 2025 23:00:47 +0000 Subject: [PATCH 1/3] Move S_uiv_2buf from sv.c to sv_inline.h This allows other parts of core to do integer stringification using the same, fast function. --- embed.fnc | 10 +++---- embed.h | 2 +- proto.h | 14 ++++----- sv.c | 82 ---------------------------------------------------- sv_inline.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 96 insertions(+), 95 deletions(-) diff --git a/embed.fnc b/embed.fnc index 43b9102e8fdc..35296a04f4eb 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3641,6 +3641,11 @@ EXop |bool |try_amagic_bin |int method \ |int flags EXop |bool |try_amagic_un |int method \ |int flags +ERTi |char * |uiv_2buf |NN char * const buf \ + |const IV iv \ + |UV uv \ + |const int is_uv \ + |NN char ** const peob Adp |SSize_t|unpackstring |NN const char *pat \ |NN const char *patend \ |NN const char *s \ @@ -5883,11 +5888,6 @@ ST |STRLEN |sv_pos_u2b_midway \ |const STRLEN uend i |void |sv_unglob |NN SV * const sv \ |U32 flags -RTi |char * |uiv_2buf |NN char * const buf \ - |const IV iv \ - |UV uv \ - |const int is_uv \ - |NN char ** const peob S |void |utf8_mg_len_cache_update \ |NN SV * const sv \ |NN MAGIC ** const mgp \ diff --git a/embed.h b/embed.h index e4b4bb5e1cd9..15ced07e1707 100644 --- a/embed.h +++ b/embed.h @@ -1855,6 +1855,7 @@ # define skipspace_flags(a,b) Perl_skipspace_flags(aTHX_ a,b) # define sv_magicext_mglob(a) Perl_sv_magicext_mglob(aTHX_ a) # define sv_only_taint_gmagic Perl_sv_only_taint_gmagic +# define uiv_2buf S_uiv_2buf # define utf16_to_utf8_base(a,b,c,d,e,f) Perl_utf16_to_utf8_base(aTHX_ a,b,c,d,e,f) # define utf8_to_utf16_base(a,b,c,d,e,f) Perl_utf8_to_utf16_base(aTHX_ a,b,c,d,e,f) # define validate_proto(a,b,c,d) Perl_validate_proto(aTHX_ a,b,c,d) @@ -2178,7 +2179,6 @@ # define sv_pos_u2b_forwards S_sv_pos_u2b_forwards # define sv_pos_u2b_midway S_sv_pos_u2b_midway # define sv_unglob(a,b) S_sv_unglob(aTHX_ a,b) -# define uiv_2buf S_uiv_2buf # define utf8_mg_len_cache_update(a,b,c) S_utf8_mg_len_cache_update(aTHX_ a,b,c) # define utf8_mg_pos_cache_update(a,b,c,d,e) S_utf8_mg_pos_cache_update(aTHX_ a,b,c,d,e) # define visit(a,b,c) S_visit(aTHX_ a,b,c) diff --git a/proto.h b/proto.h index 08ec30c5ce15..fd74d06733eb 100644 --- a/proto.h +++ b/proto.h @@ -9083,13 +9083,7 @@ S_sv_unglob(pTHX_ SV * const sv, U32 flags); # define PERL_ARGS_ASSERT_SV_UNGLOB \ assert(sv) -PERL_STATIC_INLINE char * -S_uiv_2buf(char * const buf, const IV iv, UV uv, const int is_uv, char ** const peob) - __attribute__warn_unused_result__; -# define PERL_ARGS_ASSERT_UIV_2BUF \ - assert(buf); assert(peob) - -# endif /* !defined(PERL_NO_INLINE_FUNCTIONS) */ +# endif # if defined(USE_ITHREADS) STATIC SV * S_sv_dup_common(pTHX_ const SV * const ssv, CLONE_PARAMS * const param) @@ -10007,6 +10001,12 @@ Perl_switch_argstack(pTHX_ AV *to); # define PERL_ARGS_ASSERT_SWITCH_ARGSTACK \ assert(to); assert(SvTYPE(to) == SVt_PVAV) +PERL_STATIC_INLINE char * +S_uiv_2buf(char * const buf, const IV iv, UV uv, const int is_uv, char ** const peob) + __attribute__warn_unused_result__; +# define PERL_ARGS_ASSERT_UIV_2BUF \ + assert(buf); assert(peob) + PERL_STATIC_INLINE IV Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b) __attribute__warn_unused_result__; diff --git a/sv.c b/sv.c index 36131608315e..5a4c05e82055 100644 --- a/sv.c +++ b/sv.c @@ -2766,88 +2766,6 @@ Perl_sv_2num(pTHX_ SV *const sv) return sv_2mortal(newSVuv(PTR2UV(SvRV(sv)))); } -/* int2str_table: lookup table containing string representations of all - * two digit numbers. For example, int2str_table.arr[0] is "00" and - * int2str_table.arr[12*2] is "12". - * - * We are going to read two bytes at a time, so we have to ensure that - * the array is aligned to a 2 byte boundary. That's why it was made a - * union with a dummy U16 member. */ -static const union { - char arr[200]; - U16 dummy; -} int2str_table = {{ - '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', - '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', - '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', - '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', - '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', - '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', - '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', - '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', - '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', - '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', - '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', - '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', - '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', - '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', - '9', '8', '9', '9' -}}; - -/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or - * UV as a string towards the end of buf, and return pointers to start and - * end of it. - * - * We assume that buf is at least TYPE_CHARS(UV) long. - */ - -PERL_STATIC_INLINE char * -S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob) -{ - char *ptr = buf + TYPE_CHARS(UV); - char * const ebuf = ptr; - int sign; - U16 *word_ptr, *word_table; - - PERL_ARGS_ASSERT_UIV_2BUF; - - /* ptr has to be properly aligned, because we will cast it to U16* */ - assert(PTR2nat(ptr) % 2 == 0); - /* we are going to read/write two bytes at a time */ - word_ptr = (U16*)ptr; - word_table = (U16*)int2str_table.arr; - - if (UNLIKELY(is_uv)) - sign = 0; - else if (iv >= 0) { - uv = iv; - sign = 0; - } else { - /* Using 0- here to silence bogus warning from MS VC */ - uv = (UV) (0 - (UV) iv); - sign = 1; - } - - while (uv > 99) { - *--word_ptr = word_table[uv % 100]; - uv /= 100; - } - ptr = (char*)word_ptr; - - if (uv < 10) - *--ptr = (char)uv + '0'; - else { - *--word_ptr = word_table[uv]; - ptr = (char*)word_ptr; - } - - if (sign) - *--ptr = '-'; - - *peob = ebuf; - return ptr; -} - /* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an * infinity or a not-a-number, writes the appropriate strings to the * buffer, including a zero byte. On success returns the written length, diff --git a/sv_inline.h b/sv_inline.h index 57a68796cb02..db399c1b4d3c 100644 --- a/sv_inline.h +++ b/sv_inline.h @@ -1003,6 +1003,89 @@ Perl_sv_setpv_freshbuf(pTHX_ SV *const sv) return SvPVX(sv); } +/* int2str_table: lookup table containing string representations of all + * two digit numbers. For example, int2str_table.arr[0] is "00" and + * int2str_table.arr[12*2] is "12". + * + * We are going to read two bytes at a time, so we have to ensure that + * the array is aligned to a 2 byte boundary. That's why it was made a + * union with a dummy U16 member. */ +static const union { + char arr[200]; + U16 dummy; +} int2str_table = {{ + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', + '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', + '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', + '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', + '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', + '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', + '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', + '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', + '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', + '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', + '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', + '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', + '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', + '9', '8', '9', '9' +}}; + +/* uiv_2buf() was originally a private routine in sv.c for use by + * sv_2pv_flags(), but its usefulness elsewhere was noted, and it was + * moved out here. It prints an IV or UV as a string towards the end + * of buf, and return pointers to start and end of it. + * + * We assume that buf is at least TYPE_CHARS(UV) long. + */ + +PERL_STATIC_INLINE char * +S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob) +{ + char *ptr = buf + TYPE_CHARS(UV); + char * const ebuf = ptr; + int sign; + U16 *word_ptr, *word_table; + + PERL_ARGS_ASSERT_UIV_2BUF; + + /* ptr has to be properly aligned, because we will cast it to U16* */ + assert(PTR2nat(ptr) % 2 == 0); + /* we are going to read/write two bytes at a time */ + word_ptr = (U16*)ptr; + word_table = (U16*)int2str_table.arr; + + if (UNLIKELY(is_uv)) + sign = 0; + else if (iv >= 0) { + uv = iv; + sign = 0; + } else { + /* Using 0- here to silence bogus warning from MS VC */ + uv = (UV) (0 - (UV) iv); + sign = 1; + } + + while (uv > 99) { + *--word_ptr = word_table[uv % 100]; + uv /= 100; + } + ptr = (char*)word_ptr; + + if (uv < 10) + *--ptr = (char)uv + '0'; + else { + *--word_ptr = word_table[uv]; + ptr = (char*)word_ptr; + } + + if (sign) + *--ptr = '-'; + + *peob = ebuf; + return ptr; +} + /* * ex: set ts=8 sts=4 sw=4 et: */ From 84c6a19a1a16dbdef835b7cf30793f58e268a099 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Fri, 17 Jan 2025 22:25:38 +0000 Subject: [PATCH 2/3] Perl_do_print: use S_uiv_2buf for faster IV stringification `Perl_do_print`'s pre-existing method for stringification of an IV within an SVt_IV involves creating a temporary SVt_PV, using `sv_vcatpvfn_flags` to do the stringification, then freeing the SVt_PV once the buffer has been written out. This is considerably slower than using `S_uiv_2buf`, the helper function used by `sv_2pv_flags`. So this commit modifies `Perl_do_print` to use `sv_2pv_flags`. --- doio.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/doio.c b/doio.c index e5a6cade7765..a9f26818516e 100644 --- a/doio.c +++ b/doio.c @@ -2205,11 +2205,25 @@ Perl_do_print(pTHX_ SV *sv, PerlIO *fp) return TRUE; if (SvTYPE(sv) == SVt_IV && SvIOK(sv)) { assert(!SvGMAGICAL(sv)); - if (SvIsUV(sv)) - PerlIO_printf(fp, "%" UVuf, (UV)SvUVX(sv)); - else - PerlIO_printf(fp, "%" IVdf, (IV)SvIVX(sv)); - return !PerlIO_error(fp); + bool happy = TRUE; + + /* Adapted from Perl_sv_2pv_flags */ + const U32 isUIOK = SvIsUV(sv); + /* The purpose of this union is to ensure that arr is aligned on + a 2 byte boundary, because that is what uiv_2buf() requires */ + union { + char arr[TYPE_CHARS(UV)]; + U16 dummy; + } buf; + char *ebuf, *ptr; + STRLEN len; + UV tempuv = SvUVX(sv); + ptr = uiv_2buf(buf.arr, SvIVX(sv), tempuv, isUIOK, &ebuf); + len = ebuf - ptr; + + if (len && (PerlIO_write(fp,ptr,len) == 0)) + happy = FALSE; + return happy ? !PerlIO_error(fp) : FALSE; } else { STRLEN len; From 27a35a214b96166d104e5518d99d101d5fba9e8f Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Fri, 17 Jan 2025 22:54:09 +0000 Subject: [PATCH 3/3] Perldelta for integer stringification change in Perl_do_print --- pod/perldelta.pod | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 4edf293d0952..21fa42b5afab 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -118,6 +118,12 @@ patterns and swaps in a new dedicated operator (C). =item * +The stringification of integers by L and L, +when coming from an SVt_IV, is now more efficient. +[GH #XXXXX] + +=item * + XXX =back