From a47cbf055ceac945450c5215205cd3425337698f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Sun, 22 Oct 2023 11:44:06 +0200 Subject: [PATCH 1/2] make `Memory` typed and avoid dealing with `Layout`s everywhere --- integer/src/div/divide_conquer.rs | 3 +- integer/src/div/mod.rs | 7 +- integer/src/gcd/lehmer.rs | 16 ++-- integer/src/gcd/mod.rs | 5 +- integer/src/gcd_ops.rs | 20 ++-- integer/src/memory.rs | 150 ++++++++++++------------------ integer/src/modular/mul.rs | 16 ++-- integer/src/modular/pow.rs | 7 +- integer/src/mul/karatsuba.rs | 12 +-- integer/src/mul/mod.rs | 9 +- integer/src/mul/toom_3.rs | 8 +- integer/src/pow.rs | 8 +- integer/src/root.rs | 12 +-- integer/src/sqr/mod.rs | 13 +-- 14 files changed, 109 insertions(+), 177 deletions(-) diff --git a/integer/src/div/divide_conquer.rs b/integer/src/div/divide_conquer.rs index 455ef8a..4f96d64 100644 --- a/integer/src/div/divide_conquer.rs +++ b/integer/src/div/divide_conquer.rs @@ -9,11 +9,10 @@ use crate::{ mul, Sign::*, }; -use alloc::alloc::Layout; use static_assertions::const_assert; /// Memory requirement for division. -pub fn memory_requirement_exact(lhs_len: usize, rhs_len: usize) -> Layout { +pub fn memory_requirement_exact(lhs_len: usize, rhs_len: usize) -> usize { assert!(lhs_len >= rhs_len); // We need space for multiplications summing up to rhs.len(), // and at most lhs_len - rhs_len long. diff --git a/integer/src/div/mod.rs b/integer/src/div/mod.rs index 2ef5731..fbcc2f1 100644 --- a/integer/src/div/mod.rs +++ b/integer/src/div/mod.rs @@ -4,11 +4,10 @@ use crate::{ arch::word::{DoubleWord, Word}, helper_macros::debug_assert_zero, math::{shl_dword, shr_word, FastDivideNormalized, FastDivideNormalized2}, - memory::{self, Memory}, + memory::Memory, primitive::{double_word, extend_word, highest_dword, lowest_dword, split_dword, WORD_BITS}, shift, }; -use alloc::alloc::Layout; mod divide_conquer; mod simple; @@ -231,10 +230,10 @@ pub(crate) fn fast_rem_by_normalized_dword( } /// Memory requirement for division. -pub fn memory_requirement_exact(lhs_len: usize, rhs_len: usize) -> Layout { +pub fn memory_requirement_exact(lhs_len: usize, rhs_len: usize) -> usize { assert!(lhs_len >= rhs_len && rhs_len >= 2); if rhs_len <= THRESHOLD_SIMPLE || lhs_len - rhs_len <= THRESHOLD_SIMPLE { - memory::zero_layout() + 0 } else { divide_conquer::memory_requirement_exact(lhs_len, rhs_len) } diff --git a/integer/src/gcd/lehmer.rs b/integer/src/gcd/lehmer.rs index 3d9eb7b..34c1c50 100644 --- a/integer/src/gcd/lehmer.rs +++ b/integer/src/gcd/lehmer.rs @@ -1,4 +1,3 @@ -use alloc::alloc::Layout; use core::{mem, ptr, slice}; use dashu_base::{ExtendedGcd, Gcd}; @@ -223,7 +222,7 @@ pub(crate) fn lehmer_step(x: &mut [Word], y: &mut [Word], a: Word, b: Word, c: W /// Temporary memory required for gcd. #[inline] -pub fn memory_requirement_up_to(lhs_len: usize, rhs_len: usize) -> Layout { +pub fn memory_requirement_up_to(lhs_len: usize, rhs_len: usize) -> usize { // Required memory: // - temporary space for the division in the euclidean step div::memory_requirement_exact(lhs_len, rhs_len) @@ -328,15 +327,14 @@ fn lehmer_ext_step( } /// Temporary memory required for extended gcd. -pub fn memory_requirement_ext_up_to(lhs_len: usize, rhs_len: usize) -> Layout { +pub fn memory_requirement_ext_up_to(lhs_len: usize, rhs_len: usize) -> usize { // Required memory: // - two numbers (t0 & t1) with at most the same size as lhs, add 1 buffer word // - temporary space for a division (for euclidean step), and later a mulitplication (for coeff update) let t_words = 2 * lhs_len + 2; - memory::add_layout( - memory::array_layout::(t_words), - memory::max_layout( - div::memory_requirement_exact(lhs_len, rhs_len), // + memory::add_capacity( + t_words, + div::memory_requirement_exact(lhs_len, rhs_len).max( mul::memory_requirement_up_to(lhs_len, lhs_len / 2), // for coeff update ), ) @@ -358,8 +356,8 @@ pub fn gcd_ext_in_place( // the normal way is to have four variables s0, s1, t0, t1 and keep gcd(x, y) = gcd(lhs, rhs), // x = s0*lhs - t0*rhs, y = t1*rhs - s1*lhs. Here we simplify it by only tracking the // coefficient of rhs, so that x = -t0*rhs mod lhs, y = t1*rhs mod lhs, - let (mut t0, mut memory) = memory.allocate_slice_fill::(lhs_len + 1, 0); - let (mut t1, mut memory) = memory.allocate_slice_fill::(lhs_len + 1, 0); + let (mut t0, mut memory) = memory.allocate_slice_fill(lhs_len + 1, 0); + let (mut t1, mut memory) = memory.allocate_slice_fill(lhs_len + 1, 0); let (mut t0_len, mut t1_len) = (1, 1); *t1.first_mut().unwrap() = 1; diff --git a/integer/src/gcd/mod.rs b/integer/src/gcd/mod.rs index 7cc0d16..a70e6e4 100644 --- a/integer/src/gcd/mod.rs +++ b/integer/src/gcd/mod.rs @@ -8,7 +8,6 @@ use crate::{ primitive::{extend_word, shrink_dword, PrimitiveSigned}, Sign, }; -use alloc::alloc::Layout; use dashu_base::ExtendedGcd; mod lehmer; @@ -30,7 +29,7 @@ pub fn gcd_in_place(lhs: &mut [Word], rhs: &mut [Word], memory: &mut Memory) -> } /// Memory requirement for GCD. -pub fn memory_requirement_exact(lhs_len: usize, rhs_len: usize) -> Layout { +pub fn memory_requirement_exact(lhs_len: usize, rhs_len: usize) -> usize { lehmer::memory_requirement_up_to(lhs_len, rhs_len) } @@ -53,7 +52,7 @@ pub fn gcd_ext_in_place( } /// Memory requirement for extended GCD. -pub fn memory_requirement_ext_exact(lhs_len: usize, rhs_len: usize) -> Layout { +pub fn memory_requirement_ext_exact(lhs_len: usize, rhs_len: usize) -> usize { lehmer::memory_requirement_ext_up_to(lhs_len, rhs_len) } diff --git a/integer/src/gcd_ops.rs b/integer/src/gcd_ops.rs index 00c2145..a36f4b8 100644 --- a/integer/src/gcd_ops.rs +++ b/integer/src/gcd_ops.rs @@ -295,21 +295,17 @@ mod repr { let (lhs_len, rhs_len) = (lhs.len(), rhs.len()); // allocate memory - let clone_mem = memory::array_layout::(lhs_len + rhs_len); + let clone_mem = memory::add_capacity(lhs_len, rhs_len); let gcd_mem = gcd::memory_requirement_ext_exact(lhs_len, rhs_len); - let post_mem = memory::add_layout( + let post_mem = memory::add_capacity( // temporary space to store residue - memory::array_layout::(lhs_len + rhs_len), - memory::max_layout( - // memory required for post processing: one multiplication + one division - mul::memory_requirement_exact(lhs_len + rhs_len, rhs_len), - div::memory_requirement_exact(lhs_len + rhs_len + 1, rhs_len), - ), - ); - let mut allocation = MemoryAllocation::new(memory::add_layout( clone_mem, - memory::max_layout(gcd_mem, post_mem), - )); + // memory required for post processing: one multiplication + one division + mul::memory_requirement_exact(clone_mem, rhs_len) + .max(div::memory_requirement_exact(memory::add_capacity(clone_mem, 1), rhs_len)), + ); + let mut allocation = + MemoryAllocation::new(memory::add_capacity(clone_mem, gcd_mem.max(post_mem))); let mut memory = allocation.memory(); // copy oprands for post processing diff --git a/integer/src/memory.rs b/integer/src/memory.rs index 4b87caa..c3906c0 100644 --- a/integer/src/memory.rs +++ b/integer/src/memory.rs @@ -1,76 +1,74 @@ //! Memory allocation. use crate::error::{panic_allocate_too_much, panic_out_of_memory}; +use crate::Word; use alloc::alloc::Layout; -use core::{fmt, marker::PhantomData, mem, slice}; +use core::{fmt, marker::PhantomData, slice}; /// Chunk of memory directly allocated from the global allocator. -pub struct MemoryAllocation { - layout: Layout, - start: *mut u8, +pub struct MemoryAllocation { + capacity: usize, + start: *mut T, } /// Chunk of memory. -pub struct Memory<'a> { +pub struct Memory<'a, T: Copy = Word> { /// Start pointer. - start: *mut u8, - /// End pointer. - end: *mut u8, + start: *mut T, + /// Capacity. + capacity: usize, /// Logically, Memory contains a reference to some data with lifetime 'a. - phantom_data: PhantomData<&'a mut ()>, + phantom_data: PhantomData<&'a mut T>, } -impl fmt::Debug for Memory<'_> { +impl fmt::Debug for Memory<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("Memory chunk (")?; - // SAFETY: the safety here is guaranteed by the constructors of `Memory`. - let offset = unsafe { self.end.offset_from(self.start) }; - offset.fmt(f)?; - f.write_str(" bytes)") + write!(f, "Memory chunk ({} items)", self.capacity) } } -impl MemoryAllocation { +impl MemoryAllocation { /// Allocate memory. - pub fn new(layout: Layout) -> MemoryAllocation { - let start = if layout.size() == 0 { + pub fn new(capacity: usize) -> Self { + let start = if capacity == 0 { // We should use layout.dangling(), but that is unstable. - layout.align() as *mut u8 - } else if layout.size() > isize::MAX as usize { - panic_allocate_too_much() + core::ptr::NonNull::dangling().as_ptr() } else { + let layout = Layout::array::(capacity).unwrap_or_else(|_| panic_allocate_too_much()); // SAFETY: it's checked above that layout.size() != 0. let ptr = unsafe { alloc::alloc::alloc(layout) }; if ptr.is_null() { panic_out_of_memory(); } - ptr + ptr.cast() }; - MemoryAllocation { layout, start } + Self { capacity, start } } /// Get memory. #[inline] - pub fn memory(&mut self) -> Memory { + pub fn memory(&mut self) -> Memory { Memory { start: self.start, - end: self.start.wrapping_add(self.layout.size()), + capacity: self.capacity, phantom_data: PhantomData, } } } -impl Drop for MemoryAllocation { +impl Drop for MemoryAllocation { fn drop(&mut self) { - if self.layout.size() != 0 { + if self.capacity != 0 { // SAFETY: the memory was allocated with the same layout. - unsafe { alloc::alloc::dealloc(self.start, self.layout) }; + unsafe { + alloc::alloc::dealloc(self.start.cast(), Layout::array::(self.capacity).unwrap()) + }; } } } -impl Memory<'_> { +impl Memory<'_, T> { /// Allocate a slice with a given value. /// /// Returns the remaining chunk of memory. @@ -78,8 +76,8 @@ impl Memory<'_> { /// The original memory is not usable until both the new memory and the slice are dropped. /// /// The elements of the slice never get dropped! - pub fn allocate_slice_fill(&mut self, n: usize, val: T) -> (&mut [T], Memory) { - self.allocate_slice_initialize::(n, |ptr| { + pub fn allocate_slice_fill(&mut self, n: usize, val: T) -> (&mut [T], Memory<'_, T>) { + self.allocate_slice_initialize(n, |ptr| { for i in 0..n { // SAFETY: ptr is properly aligned and has enough space. unsafe { @@ -96,8 +94,8 @@ impl Memory<'_> { /// The original memory is not usable until both the new memory and the slice are dropped. /// /// The elements of the slice never get dropped! - pub fn allocate_slice_copy(&mut self, source: &[T]) -> (&mut [T], Memory) { - self.allocate_slice_initialize::(source.len(), |ptr| { + pub fn allocate_slice_copy(&mut self, source: &[T]) -> (&mut [T], Memory<'_, T>) { + self.allocate_slice_initialize(source.len(), |ptr| { for (i, v) in source.iter().enumerate() { // SAFETY: ptr is properly aligned and has enough space. unsafe { @@ -114,15 +112,15 @@ impl Memory<'_> { /// The original memory is not usable until both the new memory and the slice are dropped. /// /// The elements of the slice never get dropped! - pub fn allocate_slice_copy_fill( + pub fn allocate_slice_copy_fill( &mut self, n: usize, source: &[T], val: T, - ) -> (&mut [T], Memory) { + ) -> (&mut [T], Memory<'_, T>) { assert!(n >= source.len()); - self.allocate_slice_initialize::(n, |ptr| { + self.allocate_slice_initialize(n, |ptr| { for (i, v) in source.iter().enumerate() { // SAFETY: ptr is properly aligned and has enough space. unsafe { @@ -140,61 +138,41 @@ impl Memory<'_> { /// First allocate a slice of size n, and then initialize the memory with `F`. /// The initializer `F` must ensure that all allocated words are initialized. - fn allocate_slice_initialize(&mut self, n: usize, init: F) -> (&mut [T], Memory) + fn allocate_slice_initialize(&mut self, n: usize, init: F) -> (&mut [T], Memory<'_, T>) where F: FnOnce(*mut T), { #[allow(clippy::redundant_closure)] let (ptr, slice_end) = self - .try_find_memory_for_slice::(n) + .try_find_memory_for_slice(n) .expect("internal error: not enough memory allocated"); init(ptr); // SAFETY: ptr is properly sized and aligned guaranteed by `try_find_memory_for_slice`. let slice = unsafe { slice::from_raw_parts_mut(ptr, n) }; - let new_memory = Memory { + let new_memory = Self { start: slice_end, - end: self.end, + capacity: self.capacity - n, phantom_data: PhantomData, }; (slice, new_memory) } - fn try_find_memory_for_slice(&self, n: usize) -> Option<(*mut T, *mut u8)> { - let start = self.start as usize; - let end = self.end as usize; - - let padding = start.wrapping_neg() & (mem::align_of::() - 1); - let slice_start = start.checked_add(padding)?; - let size = n.checked_mul(mem::size_of::())?; - let slice_end = slice_start.checked_add(size)?; - if slice_end <= end { - Some((slice_start as *mut T, slice_end as *mut u8)) + fn try_find_memory_for_slice(&self, n: usize) -> Option<(*mut T, *mut T)> { + if n <= self.capacity { + // SAFETY: We just checked there is enough capacity + unsafe { Some((self.start, self.start.add(n))) } } else { None } } } -#[inline] -pub fn zero_layout() -> Layout { - Layout::from_size_align(0, 1).unwrap() -} - -pub fn array_layout(n: usize) -> Layout { - Layout::array::(n).unwrap_or_else(|_| panic_allocate_too_much()) -} - -pub fn add_layout(a: Layout, b: Layout) -> Layout { - let (layout, _padding) = a.extend(b).unwrap_or_else(|_| panic_allocate_too_much()); - layout -} - -pub fn max_layout(a: Layout, b: Layout) -> Layout { - Layout::from_size_align(a.size().max(b.size()), a.align().max(b.align())) - .unwrap_or_else(|_| panic_allocate_too_much()) +pub fn add_capacity(a: usize, b: usize) -> usize { + a.checked_add(b) + .unwrap_or_else(|| panic_allocate_too_much()) } #[cfg(test)] @@ -203,50 +181,36 @@ mod tests { #[test] fn test_memory() { - let mut scratchpad = MemoryAllocation::new(Layout::from_size_align(8, 4).unwrap()); + let mut scratchpad = MemoryAllocation::::new(2); let mut memory = scratchpad.memory(); - let (a, mut new_memory) = memory.allocate_slice_fill::(1, 3); + let (a, mut new_memory) = memory.allocate_slice_fill(1, 3); assert_eq!(a, &[3]); // Neither of these should compile: // let _ = scratchpad.memory(); - // let _ = memory.allocate_slice::(1, 3); - let (b, _) = new_memory.allocate_slice_fill::(1, 4); + // let _ = memory.allocate_slice(1, 3); + let (b, _) = new_memory.allocate_slice_fill(1, 4); assert_eq!(b, &[4]); // Now we can reuse the memory. - let (c, _) = memory.allocate_slice_copy::(&[4, 5]); + let (c, _) = memory.allocate_slice_copy(&[4, 5]); assert_eq!(c, &[4, 5]); // Reuse the memory again. - let (c, _) = memory.allocate_slice_copy_fill::(2, &[4], 7); + let (c, _) = memory.allocate_slice_copy_fill(2, &[4], 7); assert_eq!(c, &[4, 7]); } #[test] #[should_panic] fn test_memory_ran_out() { - let mut scratchpad = MemoryAllocation::new(Layout::from_size_align(8, 4).unwrap()); + let mut scratchpad = MemoryAllocation::::new(2); let mut memory = scratchpad.memory(); - let (a, mut new_memory) = memory.allocate_slice_fill::(1, 3); + let (a, mut new_memory) = memory.allocate_slice_fill(1, 3); assert_eq!(a, &[3]); - let _ = new_memory.allocate_slice_fill::(2, 4); - } - - #[test] - fn test_add_layout() { - let layout = add_layout( - Layout::from_size_align(1, 1).unwrap(), - Layout::from_size_align(8, 4).unwrap(), - ); - assert_eq!(layout.size(), 12); - assert_eq!(layout.align(), 4); + let _ = new_memory.allocate_slice_fill(2, 4); } #[test] - fn test_max_layout() { - let layout = max_layout( - Layout::from_size_align(100, 1).unwrap(), - Layout::from_size_align(8, 4).unwrap(), - ); - assert_eq!(layout.size(), 100); - assert_eq!(layout.align(), 4); + fn test_add_capacity() { + let capacity = add_capacity(1, 8); + assert_eq!(capacity, 9); } } diff --git a/integer/src/modular/mul.rs b/integer/src/modular/mul.rs index 495e878..2e3dbd4 100644 --- a/integer/src/modular/mul.rs +++ b/integer/src/modular/mul.rs @@ -11,7 +11,6 @@ use crate::{ primitive::{extend_word, locate_top_word_plus_one, split_dword}, shift, sqr, }; -use alloc::alloc::Layout; use core::ops::{Deref, Mul, MulAssign}; use num_modular::Reducer; @@ -115,14 +114,11 @@ impl<'a> Reduced<'a> { } } -pub(crate) fn mul_memory_requirement(ring: &ConstLargeDivisor) -> Layout { +pub(crate) fn mul_memory_requirement(ring: &ConstLargeDivisor) -> usize { let n = ring.normalized_divisor.len(); - memory::add_layout( - memory::array_layout::(2 * n), - memory::max_layout( - mul::memory_requirement_exact(2 * n, n), - div::memory_requirement_exact(2 * n, n), - ), + memory::add_capacity( + 2 * n, + mul::memory_requirement_exact(2 * n, n).max(div::memory_requirement_exact(2 * n, n)), ) } @@ -142,7 +138,7 @@ pub(crate) fn mul_normalized<'a>( let nb = locate_top_word_plus_one(b); // product = a * b - let (product, mut memory) = memory.allocate_slice_fill::(n.max(na + nb), 0); + let (product, mut memory) = memory.allocate_slice_fill(n.max(na + nb), 0); if na | nb == 0 { return product; } else if na == 1 && nb == 1 { @@ -198,7 +194,7 @@ pub(crate) fn sqr_normalized<'a>( let na = locate_top_word_plus_one(a); // product = a * a - let (product, mut memory) = memory.allocate_slice_fill::(n.max(na * 2), 0); + let (product, mut memory) = memory.allocate_slice_fill(n.max(na * 2), 0); if na == 0 { return product; } else if na == 1 { diff --git a/integer/src/modular/pow.rs b/integer/src/modular/pow.rs index c615250..9fe582f 100644 --- a/integer/src/modular/pow.rs +++ b/integer/src/modular/pow.rs @@ -140,13 +140,10 @@ mod large { .checked_mul(n) .unwrap_or_else(|| panic_allocate_too_much()); - let memory_requirement = memory::add_layout( - memory::array_layout::(table_words), - mul_memory_requirement(ring), - ); + let memory_requirement = memory::add_capacity(table_words, mul_memory_requirement(ring)); let mut allocation = MemoryAllocation::new(memory_requirement); let mut memory = allocation.memory(); - let (table, mut memory) = memory.allocate_slice_fill::(table_words, 0); + let (table, mut memory) = memory.allocate_slice_fill(table_words, 0); // val = raw^2 let mut val = raw.clone(); diff --git a/integer/src/mul/karatsuba.rs b/integer/src/mul/karatsuba.rs index f4dc2c0..538d0c5 100644 --- a/integer/src/mul/karatsuba.rs +++ b/integer/src/mul/karatsuba.rs @@ -5,11 +5,10 @@ use crate::{ arch::word::{SignedWord, Word}, helper_macros::debug_assert_zero, math, - memory::{self, Memory}, + memory::Memory, mul::{self, helpers}, Sign::{self, *}, }; -use alloc::alloc::Layout; // We must have 3 * floor((n+1)/2) <= 2n. // @@ -21,7 +20,7 @@ pub const MIN_LEN: usize = 3; /// Temporary memory required for multiplication. /// /// n bounds the length of the Smaller factor in words. -pub fn memory_requirement_up_to(n: usize) -> Layout { +pub fn memory_requirement_up_to(n: usize) -> usize { /* We prove by induction that: * f(n) <= 2n + 2 log_2 (n-1) * @@ -33,8 +32,7 @@ pub fn memory_requirement_up_to(n: usize) -> Layout { */ // Use 2n + 2 ceil log_2 n. - let num_words = 2 * n + 2 * (math::ceil_log2(n) as usize); - memory::array_layout::(num_words) + 2 * n + 2 * (math::ceil_log2(n) as usize) } /// c += sign * a * b @@ -91,7 +89,7 @@ pub fn add_signed_mul_same_len( { // c_0 += a_lo * b_lo // c_1 += a_lo * b_lo - let (c_lo, mut memory) = memory.allocate_slice_fill::(2 * mid, 0); + let (c_lo, mut memory) = memory.allocate_slice_fill(2 * mid, 0); debug_assert_zero!(mul::add_signed_mul_same_len(c_lo, Positive, a_lo, b_lo, &mut memory)); carry_c0 += add::add_signed_same_len_in_place(&mut c[..2 * mid], sign, c_lo); carry_c1 += add::add_signed_same_len_in_place(&mut c[mid..3 * mid], sign, c_lo); @@ -99,7 +97,7 @@ pub fn add_signed_mul_same_len( { // c_2 += a_hi * b_hi // c_1 += a_hi * b_hi - let (c_hi, mut memory) = memory.allocate_slice_fill::(2 * (n - mid), 0); + let (c_hi, mut memory) = memory.allocate_slice_fill(2 * (n - mid), 0); debug_assert_zero!(mul::add_signed_mul_same_len(c_hi, Positive, a_hi, b_hi, &mut memory)); carry += add::add_signed_same_len_in_place(&mut c[2 * mid..], sign, c_hi); carry_c1 += add::add_signed_in_place(&mut c[mid..3 * mid], sign, c_hi); diff --git a/integer/src/mul/mod.rs b/integer/src/mul/mod.rs index 1f2d2fb..116ff14 100644 --- a/integer/src/mul/mod.rs +++ b/integer/src/mul/mod.rs @@ -5,11 +5,10 @@ use crate::{ arch::word::{DoubleWord, SignedWord, Word}, helper_macros::debug_assert_zero, math, - memory::{self, Memory}, + memory::Memory, primitive::{double_word, extend_word, split_dword}, Sign, }; -use alloc::alloc::Layout; use core::mem; use static_assertions::const_assert; @@ -158,9 +157,9 @@ pub fn sub_mul_word_same_len_in_place(words: &mut [Word], mult: Word, rhs: &[Wor } /// Temporary scratch space required for multiplication. -pub fn memory_requirement_up_to(_total_len: usize, smaller_len: usize) -> Layout { +pub fn memory_requirement_up_to(_total_len: usize, smaller_len: usize) -> usize { if smaller_len <= THRESHOLD_SIMPLE { - memory::zero_layout() + 0 } else if smaller_len <= THRESHOLD_KARATSUBA { karatsuba::memory_requirement_up_to(smaller_len) } else { @@ -169,7 +168,7 @@ pub fn memory_requirement_up_to(_total_len: usize, smaller_len: usize) -> Layout } /// Temporary scratch space required for multiplication. -pub fn memory_requirement_exact(total_len: usize, smaller_len: usize) -> Layout { +pub fn memory_requirement_exact(total_len: usize, smaller_len: usize) -> usize { memory_requirement_up_to(total_len, smaller_len) } diff --git a/integer/src/mul/toom_3.rs b/integer/src/mul/toom_3.rs index 608dbe2..da904d5 100644 --- a/integer/src/mul/toom_3.rs +++ b/integer/src/mul/toom_3.rs @@ -6,12 +6,11 @@ use crate::{ div, helper_macros::debug_assert_zero, math, - memory::{self, Memory}, + memory::Memory, mul::{self, helpers}, shift, Sign::{self, *}, }; -use alloc::alloc::Layout; /* We must have: * 2 * (n+2) <= n @@ -31,7 +30,7 @@ pub const MIN_LEN: usize = 16; /// Temporary memory required for multiplication. /// /// n bounds the length of the Smaller factor in words. -pub fn memory_requirement_up_to(n: usize) -> Layout { +pub fn memory_requirement_up_to(n: usize) -> usize { /* In each level of recursion we use: * a_eval: n3 + 1 * b_eval: n3 + 1 @@ -54,8 +53,7 @@ pub fn memory_requirement_up_to(n: usize) -> Layout { // Note: the recurence also works when we transition to Karatsuba, because // Karatsuba memory requirements are Smaller. - let num_words = 4 * n + 13 * (math::ceil_log2(n) as usize); - memory::array_layout::(num_words) + 4 * n + 13 * (math::ceil_log2(n) as usize) } /// c += sign * a * b diff --git a/integer/src/pow.rs b/integer/src/pow.rs index bf65654..4f0284f 100644 --- a/integer/src/pow.rs +++ b/integer/src/pow.rs @@ -134,8 +134,8 @@ pub(crate) mod repr { let (exp, exp_rem) = exp.div_rem(wexp); let mut res = Buffer::allocate(exp + 1); // result is at most exp + 1 words let mut allocation = MemoryAllocation::new( - memory::add_layout( - memory::array_layout::(exp / 2 + 1), // store res before squaring + memory::add_capacity( + exp / 2 + 1, // store res before squaring sqr::memory_requirement_exact(exp / 2 + 1), ), // memory for squaring ); @@ -177,8 +177,8 @@ pub(crate) mod repr { let mut res = Buffer::allocate(2 * exp); // result is at most 2 * exp words let mut allocation = MemoryAllocation::new( - memory::add_layout( - memory::array_layout::(exp), // store res before squaring + memory::add_capacity( + exp, // store res before squaring sqr::memory_requirement_exact(exp), ), // memory for squaring ); diff --git a/integer/src/root.rs b/integer/src/root.rs index 549be75..5732919 100644 --- a/integer/src/root.rs +++ b/integer/src/root.rs @@ -3,25 +3,21 @@ use crate::{ arch::word::{DoubleWord, Word}, div, math::FastDivideNormalized2, - memory::{self, Memory}, + memory::Memory, mul::add_mul_word_in_place, primitive::{double_word, extend_word, highest_dword, split_dword, WORD_BITS}, shift::shr_in_place_with_carry, sqr, }; -use alloc::alloc::Layout; use dashu_base::{DivRem, SquareRootRem}; // n is the size of the output, or half the size of the input -pub fn memory_requirement_sqrt_rem(n: usize) -> Layout { +pub fn memory_requirement_sqrt_rem(n: usize) -> usize { if n == 2 { - memory::zero_layout() + 0 } else { // We need to perform a squaring with n words and an n by n/2 division - memory::max_layout( - sqr::memory_requirement_exact(n), - div::memory_requirement_exact(n, n - n / 2), - ) + sqr::memory_requirement_exact(n).max(div::memory_requirement_exact(n, n - n / 2)) } } diff --git a/integer/src/sqr/mod.rs b/integer/src/sqr/mod.rs index ac0068c..f1c6787 100644 --- a/integer/src/sqr/mod.rs +++ b/integer/src/sqr/mod.rs @@ -1,22 +1,15 @@ //! Square. -use alloc::alloc::Layout; - -use crate::{ - arch::word::Word, - helper_macros::debug_assert_zero, - memory::{self, Memory}, - mul, Sign, -}; +use crate::{arch::word::Word, helper_macros::debug_assert_zero, memory::Memory, mul, Sign}; mod simple; /// If operand length <= this, simple squaring will be used. const MAX_LEN_SIMPLE: usize = 30; -pub fn memory_requirement_exact(len: usize) -> Layout { +pub fn memory_requirement_exact(len: usize) -> usize { if len <= MAX_LEN_SIMPLE { - memory::zero_layout() + 0 } else { mul::memory_requirement_up_to(2 * len, len) } From d9a16b27c76b3d44e850d7f7b27b5097abe1a4a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Sun, 22 Oct 2023 12:07:55 +0200 Subject: [PATCH 2/2] refactor `Memory`/`MemoryAllocation` to avoid dealing with allocations and raw pointers --- integer/src/memory.rs | 118 ++++++++++++------------------------------ 1 file changed, 32 insertions(+), 86 deletions(-) diff --git a/integer/src/memory.rs b/integer/src/memory.rs index c3906c0..d79171e 100644 --- a/integer/src/memory.rs +++ b/integer/src/memory.rs @@ -1,69 +1,40 @@ //! Memory allocation. -use crate::error::{panic_allocate_too_much, panic_out_of_memory}; +use crate::error::panic_allocate_too_much; use crate::Word; -use alloc::alloc::Layout; -use core::{fmt, marker::PhantomData, slice}; +use alloc::vec::Vec; +use core::fmt; +use core::mem::{transmute, MaybeUninit}; /// Chunk of memory directly allocated from the global allocator. pub struct MemoryAllocation { - capacity: usize, - start: *mut T, + storage: Vec, } /// Chunk of memory. pub struct Memory<'a, T: Copy = Word> { - /// Start pointer. - start: *mut T, - /// Capacity. - capacity: usize, - /// Logically, Memory contains a reference to some data with lifetime 'a. - phantom_data: PhantomData<&'a mut T>, + slice: &'a mut [MaybeUninit], } impl fmt::Debug for Memory<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Memory chunk ({} items)", self.capacity) + write!(f, "Memory chunk ({} items)", self.slice.len()) } } impl MemoryAllocation { /// Allocate memory. pub fn new(capacity: usize) -> Self { - let start = if capacity == 0 { - // We should use layout.dangling(), but that is unstable. - core::ptr::NonNull::dangling().as_ptr() - } else { - let layout = Layout::array::(capacity).unwrap_or_else(|_| panic_allocate_too_much()); - // SAFETY: it's checked above that layout.size() != 0. - let ptr = unsafe { alloc::alloc::alloc(layout) }; - if ptr.is_null() { - panic_out_of_memory(); - } - ptr.cast() - }; - - Self { capacity, start } + Self { + storage: Vec::with_capacity(capacity), + } } /// Get memory. #[inline] pub fn memory(&mut self) -> Memory { Memory { - start: self.start, - capacity: self.capacity, - phantom_data: PhantomData, - } - } -} - -impl Drop for MemoryAllocation { - fn drop(&mut self) { - if self.capacity != 0 { - // SAFETY: the memory was allocated with the same layout. - unsafe { - alloc::alloc::dealloc(self.start.cast(), Layout::array::(self.capacity).unwrap()) - }; + slice: self.storage.spare_capacity_mut(), } } } @@ -77,13 +48,12 @@ impl Memory<'_, T> { /// /// The elements of the slice never get dropped! pub fn allocate_slice_fill(&mut self, n: usize, val: T) -> (&mut [T], Memory<'_, T>) { - self.allocate_slice_initialize(n, |ptr| { - for i in 0..n { - // SAFETY: ptr is properly aligned and has enough space. - unsafe { - ptr.add(i).write(val); - }; + self.allocate_slice_initialize(n, |slice| { + for item in slice.iter_mut() { + item.write(val); } + // SAFETY: Slice has just been initialized + unsafe { transmute(slice) } }) } @@ -95,13 +65,12 @@ impl Memory<'_, T> { /// /// The elements of the slice never get dropped! pub fn allocate_slice_copy(&mut self, source: &[T]) -> (&mut [T], Memory<'_, T>) { - self.allocate_slice_initialize(source.len(), |ptr| { - for (i, v) in source.iter().enumerate() { - // SAFETY: ptr is properly aligned and has enough space. - unsafe { - ptr.add(i).write(*v); - }; + self.allocate_slice_initialize(source.len(), |slice| { + for (item, &source) in slice.iter_mut().zip(source) { + item.write(source); } + // SAFETY: Slice has just been initialized + unsafe { transmute(slice) } }) } @@ -120,19 +89,15 @@ impl Memory<'_, T> { ) -> (&mut [T], Memory<'_, T>) { assert!(n >= source.len()); - self.allocate_slice_initialize(n, |ptr| { - for (i, v) in source.iter().enumerate() { - // SAFETY: ptr is properly aligned and has enough space. - unsafe { - ptr.add(i).write(*v); - }; + self.allocate_slice_initialize(n, |slice| { + for (item, &source) in slice.iter_mut().zip(source) { + item.write(source); } - for i in source.len()..n { - // SAFETY: ptr is properly aligned and has enough space. - unsafe { - ptr.add(i).write(val); - }; + for item in slice[source.len()..].iter_mut() { + item.write(val); } + // SAFETY: Slice has just been initialized + unsafe { transmute(slice) } }) } @@ -140,34 +105,15 @@ impl Memory<'_, T> { /// The initializer `F` must ensure that all allocated words are initialized. fn allocate_slice_initialize(&mut self, n: usize, init: F) -> (&mut [T], Memory<'_, T>) where - F: FnOnce(*mut T), + F: FnOnce(&mut [MaybeUninit]) -> &mut [T], { - #[allow(clippy::redundant_closure)] - let (ptr, slice_end) = self - .try_find_memory_for_slice(n) - .expect("internal error: not enough memory allocated"); - - init(ptr); + let (slice, remaining) = self.slice.split_at_mut(n); + let slice = init(slice); - // SAFETY: ptr is properly sized and aligned guaranteed by `try_find_memory_for_slice`. - let slice = unsafe { slice::from_raw_parts_mut(ptr, n) }; - let new_memory = Self { - start: slice_end, - capacity: self.capacity - n, - phantom_data: PhantomData, - }; + let new_memory = Memory { slice: remaining }; (slice, new_memory) } - - fn try_find_memory_for_slice(&self, n: usize) -> Option<(*mut T, *mut T)> { - if n <= self.capacity { - // SAFETY: We just checked there is enough capacity - unsafe { Some((self.start, self.start.add(n))) } - } else { - None - } - } } pub fn add_capacity(a: usize, b: usize) -> usize {