diff --git a/llama.cpp b/llama.cpp index 23086da8f3df46..0fd9c12a0715bf 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12540,11 +12540,11 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { if (lctx.inp_pos_bucket) { const int64_t n_tokens = batch.n_tokens; - + GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_pos_bucket->buffer)); - + int32_t * data = (int32_t *) lctx.inp_pos_bucket->data; - + if (!lctx.is_encoding) { const int64_t n_kv = kv_self.n; for (int h = 0; h < 1; ++h) {