From 87b7dd23226e7a82ae0a1135336ea8bb28382e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Tue, 25 Jun 2024 17:36:42 +0200 Subject: [PATCH] llama : replace allocated precompiled_charsmap buffer with std::vector to avoid memory leak --- llama.cpp | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/llama.cpp b/llama.cpp index d7050894250fd..32683f2a6b34f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2433,8 +2433,7 @@ struct llama_vocab { bool tokenizer_escape_whitespaces = true; bool tokenizer_treat_whitespace_as_suffix = false; - uint32_t n_precompiled_charsmap = 0; - char * precompiled_charsmap = NULL; + std::vector precompiled_charsmap; int find_bpe_rank(const std::string & token_left, const std::string & token_right) const { GGML_ASSERT(token_left.find(' ') == std::string::npos); @@ -4974,9 +4973,20 @@ static void llm_load_vocab( const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str()); if (precompiled_charsmap_keyidx != -1) { - vocab.n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx); - vocab.precompiled_charsmap = (char *) malloc(vocab.n_precompiled_charsmap); - memcpy((void*) vocab.precompiled_charsmap, gguf_get_arr_data(ctx, precompiled_charsmap_keyidx), vocab.n_precompiled_charsmap); + size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx); + const char * precompiled_charsmap = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx); + vocab.precompiled_charsmap.assign(precompiled_charsmap, precompiled_charsmap + n_precompiled_charsmap); +#ifdef IS_BIG_ENDIAN + // correct endiannes of data in precompiled_charsmap binary blob + uint32_t * xcda_blob_size = (uint32_t *) &vocab.precompiled_charsmap[0]; + *xcda_blob_size = __builtin_bswap32(*xcda_blob_size); + assert(*xcda_blob_size + sizeof(uint32_t) < n_precompiled_charsmap); + size_t xcda_array_size = *xcda_blob_size / sizeof(uint32_t); + uint32_t * xcda_array = (uint32_t *) &vocab.precompiled_charsmap[sizeof(uint32_t)]; + for (size_t i = 0; i < xcda_array_size; ++i) { + xcda_array[i] = __builtin_bswap32(xcda_array[i]); + } +#endif } } else { throw std::runtime_error(format("unknown tokenizer: '%s'", tokenizer_model.c_str())); @@ -13990,33 +14000,27 @@ struct naive_trie { struct llm_tokenizer_ugm { llm_tokenizer_ugm(const llama_vocab & vocab) : vocab(vocab) { - if (vocab.n_precompiled_charsmap > 0) { + if (vocab.precompiled_charsmap.size() > 0) { size_t charsmap_offset = 0; // First four bytes of precompiled_charsmap contains length of binary // blob containing XOR-compressed compact double array (XCDA) entries - uint32_t xcda_blob_size = *(uint32_t *) vocab.precompiled_charsmap; -#ifdef IS_BIG_ENDIAN - xcda_blob_size = __builtin_bswap32(xcda_blob_size); -#endif + uint32_t xcda_blob_size = *(const uint32_t *) &vocab.precompiled_charsmap[0]; charsmap_offset += sizeof(xcda_blob_size); - if (xcda_blob_size + charsmap_offset >= vocab.n_precompiled_charsmap) { + if (xcda_blob_size + charsmap_offset >= vocab.precompiled_charsmap.size()) { throw std::runtime_error("Index out of array bounds in precompiled charsmap!"); } // Next xcda_blob_size bytes contain entries of XOR-compressed compact // double array (XCDA). Each entry is bit-packed into a 32-bit integer. - xcda_array = (uint32_t *) (vocab.precompiled_charsmap + charsmap_offset); + xcda_array = (const uint32_t *) &vocab.precompiled_charsmap[charsmap_offset]; xcda_array_size = xcda_blob_size / sizeof(uint32_t); -#ifdef IS_BIG_ENDIAN - for (int i = 0; i < xcda_array_size; ++i) xcda_array[i] = __builtin_bswap32(xcda_array[i]); -#endif charsmap_offset += xcda_blob_size; // Remaining bytes of precompiled charsmap contain null-terminated // replacement strings for prefixes matched by the XCDA. - prefix_replacements = vocab.precompiled_charsmap + charsmap_offset; - prefix_replacements_size = vocab.n_precompiled_charsmap - charsmap_offset; + prefix_replacements = &vocab.precompiled_charsmap[charsmap_offset]; + prefix_replacements_size = vocab.precompiled_charsmap.size() - charsmap_offset; } for (unsigned int id = 0; id < vocab.id_to_token.size(); ++id) { @@ -14201,7 +14205,7 @@ struct llm_tokenizer_ugm { */ struct xcda_array_view { public: - xcda_array_view(uint32_t * xcda_array, size_t xcda_array_size) : xcda_array(xcda_array), xcda_array_size(xcda_array_size) { + xcda_array_view(const uint32_t * xcda_array, size_t xcda_array_size) : xcda_array(xcda_array), xcda_array_size(xcda_array_size) { } uint32_t get_base(size_t index) { uint32_t packed_node = get_node(index); @@ -14226,7 +14230,7 @@ struct llm_tokenizer_ugm { } return xcda_array[index]; } - uint32_t * xcda_array; + const uint32_t * xcda_array; size_t xcda_array_size; }; @@ -14303,10 +14307,10 @@ struct llm_tokenizer_ugm { // escaped space symbol - U+2581 (Lower One Eighth Block) const std::string escaped_space = "\xE2\x96\x81"; - char * prefix_replacements = NULL; + const char * prefix_replacements = NULL; size_t prefix_replacements_size = 0; - uint32_t * xcda_array = NULL; + const uint32_t * xcda_array = NULL; size_t xcda_array_size = 0; struct naive_trie user_defined_token_matcher;