From feaacee57edf24ccd3a25bed0d9b401dc0aea400 Mon Sep 17 00:00:00 2001 From: Jeppe Dakin Date: Sun, 10 Nov 2024 14:50:25 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix=20buggy=20usage=20of=20non-c?= =?UTF-8?q?ontiguous=20slabs=20(#451)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mesh.py | 50 +++++++------------------------------------------ src/snapshot.py | 30 ++++++++++++++--------------- 2 files changed, 22 insertions(+), 58 deletions(-) diff --git a/src/mesh.py b/src/mesh.py index bebd5699..de41525d 100644 --- a/src/mesh.py +++ b/src/mesh.py @@ -2249,7 +2249,6 @@ def domain_decompose( grid='double[:, :, ::1]', slab_or_buffer_name=object, # double[:, :, ::1], int or str prepare_fft='bint', - trim='bint', # Locals N_domain2slabs_communications='Py_ssize_t', buffer_name=object, # int or str @@ -2273,7 +2272,6 @@ def domain_decompose( should_recv='bint', slab='double[:, :, ::1]', slab_arr=object, - slab_maybe_trimmed='double[:, :, ::1]', slab_sendrecv_j_end='int[::1]', slab_sendrecv_j_start='int[::1]', slab_sendrecv_k_end='int[::1]', @@ -2283,7 +2281,7 @@ def domain_decompose( ℓ='Py_ssize_t', returns='double[:, :, ::1]', ) -def slab_decompose(grid, slab_or_buffer_name=None, prepare_fft=False, trim=False): +def slab_decompose(grid, slab_or_buffer_name=None, prepare_fft=False): """This function communicates a global domain decomposed grid into a global slab decomposed grid. If an existing slab grid should be used it can be passed as the second argument. @@ -2351,14 +2349,9 @@ def slab_decompose(grid, slab_or_buffer_name=None, prepare_fft=False, trim=False slab_sendrecv_k_start, slab_sendrecv_k_end, ) = prepare_decomposition(grid, slab) - # Trim the slab if requested. - # All further operations work on both full and trimmed slabs. - slab_maybe_trimmed = slab - if trim: - slab_maybe_trimmed = trim_slab(slab) # Communicate the domain grid to the slabs in chunks n_chunks, thickness_chunk = get_slab_domain_decomposition_chunk_size( - slab_maybe_trimmed, grid_noghosts, + slab, grid_noghosts, ) for ℓ in range(N_domain2slabs_communications): should_send = (ℓ < ℤ[slabs2domain_sendrecv_ranks.shape[0]]) @@ -2398,10 +2391,10 @@ def slab_decompose(grid, slab_or_buffer_name=None, prepare_fft=False, trim=False if should_recv: chunk_recv_bgn = i_chunk*thickness_chunk chunk_recv_end = chunk_recv_bgn + thickness_chunk - if chunk_recv_end > ℤ[slab_maybe_trimmed.shape[0]]: - chunk_recv_end = ℤ[slab_maybe_trimmed.shape[0]] + if chunk_recv_end > ℤ[slab.shape[0]]: + chunk_recv_end = ℤ[slab.shape[0]] smart_mpi( - slab_maybe_trimmed[ + slab[ chunk_recv_bgn:chunk_recv_end, ℤ[slab_sendrecv_j_start[ℓ]]:ℤ[slab_sendrecv_j_end[ℓ]], ℤ[slab_sendrecv_k_start[ℓ]]:ℤ[slab_sendrecv_k_end[ℓ]], @@ -2415,7 +2408,7 @@ def slab_decompose(grid, slab_or_buffer_name=None, prepare_fft=False, trim=False # overwritten by the next (non-blocking) send. if should_send: request.wait() - return slab_maybe_trimmed + return slab # Helper function for slab and domain decomposition @cython.header( @@ -3748,30 +3741,6 @@ def fill_slab_padding(slab, value): for index, i, j, k in slab_loop(gridsize, skip_data=True): slab_ptr[index] = value -# Function for trimming away the padded elements of slabs -@cython.header( - # Arguments - slab='double[:, :, ::1]', - # Locals - arr=object, # np.ndarray - gridsize='Py_ssize_t', - slab_trimmed='double[:, :, ::1]', - returns='double[:, :, ::1]', -) -def trim_slab(slab): - """Note that trimmed slabs may not be used with FFTW""" - arr = asarray(slab) - shape = arr.shape - gridsize = slab.shape[1] - if shape != get_slabshape_local(gridsize): - abort(f'trim_slab() got slab of erroneous shape {shape}') - slab_trimmed = ( - arr.reshape(ℤ[slab.shape[0]*slab.shape[1]]*slab.shape[2]) - [:ℤ[slab.shape[0]*slab.shape[1]]*ℤ[slab.shape[2] - 2]] - .reshape((slab.shape[0], slab.shape[1], ℤ[slab.shape[2] - 2])) - ) - return slab_trimmed - # Function that returns a slab decomposed grid, # allocated by FFTW. @cython.pheader( @@ -3779,7 +3748,6 @@ def trim_slab(slab): gridsize='Py_ssize_t', buffer_name=object, # int or str or None nullify=object, # bint, str or list of str's - trim='bint', # Locals acquire='bint', as_expected='bint', @@ -3798,7 +3766,7 @@ def trim_slab(slab): wisdom_filename=str, returns='double[:, :, ::1]', ) -def get_fftw_slab(gridsize, buffer_name=None, nullify=False, trim=False): +def get_fftw_slab(gridsize, buffer_name=None, nullify=False): global fftw_plans_size, fftw_plans_forward, fftw_plans_backward if buffer_name is None: buffer_name = 'slab_global' @@ -3806,8 +3774,6 @@ def get_fftw_slab(gridsize, buffer_name=None, nullify=False, trim=False): slab = slabs.get((gridsize, buffer_name)) if slab is not None: nullify_modes(slab, nullify) - if trim: - return trim_slab(slab) return slab # Checks on the passed gridsize if gridsize%nprocs != 0: @@ -3897,8 +3863,6 @@ def get_fftw_slab(gridsize, buffer_name=None, nullify=False, trim=False): # Store and return this slab slabs[gridsize, buffer_name] = slab nullify_modes(slab, nullify) - if trim: - return trim_slab(slab) return slab # Cache storing slabs. The keys have the format (gridsize, buffer_name). cython.declare(slabs=dict) diff --git a/src/snapshot.py b/src/snapshot.py index 897eff24..14447d1e 100644 --- a/src/snapshot.py +++ b/src/snapshot.py @@ -121,9 +121,9 @@ def __init__(self): name=object, # str or int plural=str, shape=tuple, + slab='double[:, :, ::1]', slab_end='Py_ssize_t', slab_start='Py_ssize_t', - slab_trimmed='double[:, :, ::1]', start_local='Py_ssize_t', returns=str, ) @@ -245,14 +245,14 @@ def save(self, filename, save_all=False): # slab decomposed. Here we communicate the # fluid scalar to slabs before saving to # disk, improving performance enormously. - slab_trimmed = slab_decompose(fluidscalar.grid_mv, trim=True) - slab_start = ℤ[slab_trimmed.shape[0]]*rank - slab_end = slab_start + ℤ[slab_trimmed.shape[0]] + slab = slab_decompose(fluidscalar.grid_mv) + slab_start = slab.shape[0]*rank + slab_end = slab_start + slab.shape[0] fluidscalar_h5[ slab_start:slab_end, :, :, - ] = slab_trimmed[:, :, :] + ] = slab[:, :, :(slab.shape[2] - 2)] # exclude padding # Create additional names (hard links) for the fluid # groups and data sets. The names from # component.fluid_names will be used, except for @@ -328,8 +328,8 @@ def save(self, filename, save_all=False): pos='double*', representation=str, size='Py_ssize_t', + slab='double[:, :, ::1]', slab_start='Py_ssize_t', - slab_trimmed='double[:, :, ::1]', snapshot_unit_length='double', snapshot_unit_mass='double', snapshot_unit_time='double', @@ -544,14 +544,14 @@ def load(self, filename, only_params=False): fluidvar_h5 = component_h5[f'fluidvar_{index}'] for multi_index in fluidvar.multi_indices: fluidscalar_h5 = fluidvar_h5[f'fluidscalar_{multi_index}'] - slab_trimmed = get_fftw_slab(gridsize, trim=True) - slab_start = ℤ[slab_trimmed.shape[0]]*rank + slab = get_fftw_slab(gridsize) + slab_start = ℤ[slab.shape[0]]*rank # Load in using chunks. Large chunks are # fine as no temporary buffer is used. The # maximum possible chunk size is limited # by MPI, though. chunk_size = np.min(( - ℤ[slab_trimmed.shape[0]], + ℤ[slab.shape[0]], ℤ[self.chunk_size_max//8//gridsize**2], )) if chunk_size == 0: @@ -560,10 +560,10 @@ def load(self, filename, only_params=False): 'and may not be read in correctly' ) chunk_size = 1 - arr = asarray(slab_trimmed) - for index_i in range(0, ℤ[slab_trimmed.shape[0]], chunk_size): - if index_i + chunk_size > ℤ[slab_trimmed.shape[0]]: - chunk_size = ℤ[slab_trimmed.shape[0]] - index_i + arr = asarray(slab) + for index_i in range(0, ℤ[slab.shape[0]], chunk_size): + if index_i + chunk_size > ℤ[slab.shape[0]]: + chunk_size = ℤ[slab.shape[0]] - index_i index_i_file = slab_start + index_i source_sel = ( slice(index_i_file, index_i_file + chunk_size), @@ -573,7 +573,7 @@ def load(self, filename, only_params=False): dest_sel = ( slice(index_i, index_i + chunk_size), slice(None), - slice(None), + slice(0, slab.shape[2] - 2), # exclude padding ) fluidscalar_h5.read_direct( arr, source_sel=source_sel, dest_sel=dest_sel, @@ -581,7 +581,7 @@ def load(self, filename, only_params=False): # Communicate the slabs directly to the # domain decomposed fluid grids. domain_decompose( - slab_trimmed, + slab, component.fluidvars[index][multi_index].grid_mv, do_ghost_communication=True, )