From dad11fc6c65685ad6b7c204679ccd428a7488311 Mon Sep 17 00:00:00 2001
From: TAKAI Kousuke <62541129+t-a-k@users.noreply.github.com>
Date: Sat, 11 Jan 2025 01:17:31 +0900
Subject: [PATCH] utf8.c: Postpone pointer subtraction until it turns out to be
 safe

In Perl_utf8_to_uv_msgs_helper_(),  "curlen = send - s0;" used to be done
earlier in this function, but this subtraction might underflow as
"send >= s0" (that is, "e >= s0") does not necessarily hold true.

Thanks to @mauke and @tonycoz for pointing this out.
---
 utf8.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/utf8.c b/utf8.c
index f446f3c927db..a7b48994ac07 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1634,7 +1634,6 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
      * than a single character */
     const U8 * send = e;
 
-    Size_t curlen = send - s0;
     U32 possible_problems;  /* A bit is set here for each potential problem
                                found as we go along */
     UV uv = 0;
@@ -1723,11 +1722,13 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
      * allowed one, we could allow in something that shouldn't have been.
      */
 
-    if (UNLIKELY(curlen <= 0)) {
+    Size_t curlen;
+    if (UNLIKELY(s0 >= send)) {
         possible_problems |= UTF8_GOT_EMPTY;
         curlen = 0;
         goto ready_to_handle_errors;
     }
+    curlen = send - s0;
 
     /* We now know we can examine the first byte of the input */
     expectlen = UTF8SKIP(s0);