Skip to content

Commit

Permalink
sionly changelog
Browse files Browse the repository at this point in the history
  • Loading branch information
ahbarnett committed Feb 8, 2025
1 parent 295cd10 commit 12d1760
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 77 deletions.
7 changes: 5 additions & 2 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
List of features / changes made / release notes, in reverse chronological order.
If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).

Master, using release name V 2.4.0 (1/7/25)
Master, using release name V 2.4.0 (2/8/25)

* CPU opts.spreadinterponly (experts only), and GPU, logic and docs changed so
upsampfac controls kernel shape properly. Add C++/MATLAB demos. #602 (Barnett)
* PR617: Caching pip dependencies in github actions.
Forcing Ninja when building python on Windows.
* PR614: Added support for sccache in github actions.
Caching cmake dependencies so to avoid downloading fftw, xsimd, etc. every time.
Caching cmake dependencies so to avoid downloading fftw, xsimd, etc every time.
* fully removed chkbnds option (opts and spreadopts) (Barnett)
* classic GNU makefile settings make.inc.* tidied to make-platforms/ (Barnett)
* unified separate-dim arrays (eg X,Y,Z->XYZ), simplifiying core (Reinecke #592)
Expand Down
7 changes: 0 additions & 7 deletions include/cufinufft/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,6 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
printf("[cufinufft] upsampfac automatically set to %.3g\n", d_plan->opts.upsampfac);
}
}
if (d_plan->opts.gpu_spreadinterponly) {
// XNOR implementation below with boolean logic.
if ((d_plan->opts.upsampfac != 1.0) == (type != 3)) {
ier = FINUFFT_ERR_SPREADONLY_UPSAMP_INVALID;
goto finalize;
}
}
/* Setup Spreader */
if ((ier = setup_spreader_for_nufft(d_plan->spopts, tol, d_plan->opts)) > 1) {
// can return FINUFFT_WARN_EPS_TOO_SMALL=1, which is OK
Expand Down
45 changes: 21 additions & 24 deletions src/cuda/1d/cufinufft1d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,21 @@ int cufinufft1d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,
d_cstart = d_c + i * d_plan->batchsize * d_plan->M;
d_fkstart = d_fk + i * d_plan->batchsize * d_plan->ms;
d_plan->c = d_cstart;
d_plan->fk = d_fkstart;
d_plan->fk = d_fkstart; // so deconvolve will write into user output f
if (d_plan->opts.gpu_spreadinterponly)
d_plan->fw = d_fkstart;
// this is needed
d_plan->fw = d_fkstart; // spread directly into user output f

// this is needed
if ((ier = checkCudaErrors(cudaMemsetAsync(
d_plan->fw, 0, d_plan->batchsize * d_plan->nf1 * sizeof(cuda_complex<T>),
stream))))
return ier;

// Step 1: Spread
if ((ier = cuspread1d<T>(d_plan, blksize))) return ier;
// if spreadonly, skip the rest

if (d_plan->opts.gpu_spreadinterponly)
continue;

if (d_plan->opts.gpu_spreadinterponly) continue; // skip steps 2 and 3

// Step 2: FFT
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
Expand Down Expand Up @@ -103,24 +101,23 @@ int cufinufft1d2_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,

d_plan->c = d_cstart;
d_plan->fk = d_fkstart;

// Skip steps 1 and 2 if interponly
if (!d_plan->opts.gpu_spreadinterponly) {
// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
if (d_plan->opts.modeord == 0) {
if ((ier = cudeconvolve1d<T, 0>(d_plan, blksize))) return ier;
} else {
if ((ier = cudeconvolve1d<T, 1>(d_plan, blksize))) return ier;
}

// Step 2: FFT
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
if (cufft_status != CUFFT_SUCCESS) return FINUFFT_ERR_CUDA_FAILURE;
}
else
d_plan->fw = d_fkstart;

// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
if (d_plan->opts.modeord == 0) {
if ((ier = cudeconvolve1d<T, 0>(d_plan, blksize))) return ier;
} else {
if ((ier = cudeconvolve1d<T, 1>(d_plan, blksize))) return ier;
}

// Step 2: FFT
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
if (cufft_status != CUFFT_SUCCESS) return FINUFFT_ERR_CUDA_FAILURE;
} else
d_plan->fw = d_fkstart; // interpolate directly from user input f

// Step 3: Interpolate
if ((ier = cuinterp1d<T>(d_plan, blksize))) return ier;
}
Expand Down
39 changes: 18 additions & 21 deletions src/cuda/2d/cufinufft2d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ int cufinufft2d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,
d_cstart = d_c + i * d_plan->batchsize * d_plan->M;
d_fkstart = d_fk + i * d_plan->batchsize * d_plan->ms * d_plan->mt;
d_plan->c = d_cstart;
d_plan->fk = d_fkstart;
d_plan->fk = d_fkstart; // so deconvolve will write into user output f
if (d_plan->opts.gpu_spreadinterponly)
d_plan->fw = d_fkstart;
d_plan->fw = d_fkstart; // spread directly into user output f

// this is needed
if ((ier = checkCudaErrors(cudaMemsetAsync(
Expand All @@ -57,10 +57,8 @@ int cufinufft2d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,
// Step 1: Spread
if ((ier = cuspread2d<T>(d_plan, blksize))) return ier;

// if spreadonly, skip the rest
if (d_plan->opts.gpu_spreadinterponly)
continue;

if (d_plan->opts.gpu_spreadinterponly) continue; // skip steps 2 and 3

// Step 2: FFT
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
Expand Down Expand Up @@ -108,21 +106,20 @@ int cufinufft2d2_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,

// Skip steps 1 and 2 if interponly
if (!d_plan->opts.gpu_spreadinterponly) {
// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
if (d_plan->opts.modeord == 0) {
if ((ier = cudeconvolve2d<T, 0>(d_plan, blksize))) return ier;
} else {
if ((ier = cudeconvolve2d<T, 1>(d_plan, blksize))) return ier;
}

// Step 2: FFT
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
if (cufft_status != CUFFT_SUCCESS) return FINUFFT_ERR_CUDA_FAILURE;
}
else
d_plan->fw = d_fkstart;

// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
if (d_plan->opts.modeord == 0) {
if ((ier = cudeconvolve2d<T, 0>(d_plan, blksize))) return ier;
} else {
if ((ier = cudeconvolve2d<T, 1>(d_plan, blksize))) return ier;
}

// Step 2: FFT
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
if (cufft_status != CUFFT_SUCCESS) return FINUFFT_ERR_CUDA_FAILURE;
} else
d_plan->fw = d_fkstart; // interpolate directly from user input f

// Step 3: Interpolate
if ((ier = cuinterp2d<T>(d_plan, blksize))) return ier;
}
Expand Down
45 changes: 22 additions & 23 deletions src/cuda/3d/cufinufft3d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ int cufinufft3d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,
Melody Shih 07/25/19
*/
{
assert(d_plan->spopts.spread_direction == 1);
auto &stream = d_plan->stream;
int ier;
cuda_complex<T> *d_fkstart;
Expand All @@ -41,9 +42,9 @@ int cufinufft3d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,
d_fkstart = d_fk + i * d_plan->batchsize * d_plan->ms * d_plan->mt * d_plan->mu;

d_plan->c = d_cstart;
d_plan->fk = d_fkstart;
d_plan->fk = d_fkstart; // so deconvolve will write into user output f
if (d_plan->opts.gpu_spreadinterponly)
d_plan->fw = d_fkstart;
d_plan->fw = d_fkstart; // spread directly into user output f

if ((ier = checkCudaErrors(cudaMemsetAsync(
d_plan->fw, 0, d_plan->batchsize * d_plan->nf * sizeof(cuda_complex<T>),
Expand All @@ -52,12 +53,10 @@ int cufinufft3d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,

// Step 1: Spread
if ((ier = cuspread3d<T>(d_plan, blksize))) return ier;

// if spreadonly, skip the rest
if (d_plan->opts.gpu_spreadinterponly)
continue;

// Step 2: FFT

if (d_plan->opts.gpu_spreadinterponly) continue; // skip steps 2 and 3

// Step 2: FFT
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
if (cufft_status != CUFFT_SUCCESS) return FINUFFT_ERR_CUDA_FAILURE;
Expand Down Expand Up @@ -89,6 +88,7 @@ int cufinufft3d2_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,
Melody Shih 07/25/19
*/
{
assert(d_plan->spopts.spread_direction == 2);
int ier;
cuda_complex<T> *d_fkstart;
cuda_complex<T> *d_cstart;
Expand All @@ -102,21 +102,20 @@ int cufinufft3d2_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,

// Skip steps 1 and 2 if interponly
if (!d_plan->opts.gpu_spreadinterponly) {
// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
if (d_plan->opts.modeord == 0) {
if ((ier = cudeconvolve3d<T, 0>(d_plan, blksize))) return ier;
} else {
if ((ier = cudeconvolve3d<T, 1>(d_plan, blksize))) return ier;
}
// Step 2: FFT
RETURN_IF_CUDA_ERROR
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
if (cufft_status != CUFFT_SUCCESS) return FINUFFT_ERR_CUDA_FAILURE;
}
else
d_plan->fw = d_fkstart;

// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
if (d_plan->opts.modeord == 0) {
if ((ier = cudeconvolve3d<T, 0>(d_plan, blksize))) return ier;
} else {
if ((ier = cudeconvolve3d<T, 1>(d_plan, blksize))) return ier;
}
// Step 2: FFT
RETURN_IF_CUDA_ERROR
cufftResult cufft_status =
cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);
if (cufft_status != CUFFT_SUCCESS) return FINUFFT_ERR_CUDA_FAILURE;
} else
d_plan->fw = d_fkstart; // interpolate directly from user input f

// Step 3: Interpolate
if ((ier = cuinterp3d<T>(d_plan, blksize))) return ier;
}
Expand Down

0 comments on commit 12d1760

Please sign in to comment.