From d4c1f14fc0efefb45970f72b3c81b4f3ea011f30 Mon Sep 17 00:00:00 2001 From: MikaelSlevinsky Date: Wed, 2 Feb 2022 12:58:16 -0600 Subject: [PATCH] allow fftw planner flags into the fftw.c api --- examples/additiontheorem.c | 2 +- examples/calculus.c | 2 +- examples/holomorphic.c | 4 +- examples/spinweighted.c | 4 +- src/fasttransforms.h | 40 +++++++------- src/fftw.c | 104 ++++++++++++++++++------------------- src/ftinternal.h | 3 -- src/ftutilities.h | 3 ++ test/test_fftw.c | 56 ++++++++++---------- 9 files changed, 109 insertions(+), 109 deletions(-) diff --git a/examples/additiontheorem.c b/examples/additiontheorem.c index 78338f69..c778d704 100644 --- a/examples/additiontheorem.c +++ b/examples/additiontheorem.c @@ -101,7 +101,7 @@ int main(void) { printf("\n"); ft_harmonic_plan * P = ft_plan_sph2fourier(N); - ft_sphere_fftw_plan * PA = ft_plan_sph_analysis(N, M); + ft_sphere_fftw_plan * PA = ft_plan_sph_analysis(N, M, FT_FFTW_FLAGS); ft_execute_sph_analysis('N', PA, F, N, M); ft_execute_fourier2sph('N', P, F, N, M); diff --git a/examples/calculus.c b/examples/calculus.c index ad2997d7..0aac9c5f 100644 --- a/examples/calculus.c +++ b/examples/calculus.c @@ -57,7 +57,7 @@ int main(void) { P = ft_plan_tri2cheb(N, alpha, beta, gamma); Px = ft_plan_tri2cheb(N, alpha+1.0, beta, gamma+1.0); Py = ft_plan_tri2cheb(N, alpha, beta+1.0, gamma+1.0); - PA = ft_plan_tri_analysis(N, M); + PA = ft_plan_tri_analysis(N, M, FT_FFTW_FLAGS); double u[N], x[N], v[M], w[M], F[N*M], Fx[N*M], Fy[N*M], Gx[N*M], Gy[N*M]; diff --git a/examples/holomorphic.c b/examples/holomorphic.c index 2b8bd598..88413daa 100644 --- a/examples/holomorphic.c +++ b/examples/holomorphic.c @@ -86,7 +86,7 @@ int main(void) { double alpha = 0.0, beta = 0.0; ft_harmonic_plan * P = ft_plan_disk2cxf(N, alpha, beta); - ft_disk_fftw_plan * PA = ft_plan_disk_analysis(N, M); + ft_disk_fftw_plan * PA = ft_plan_disk_analysis(N, M, FT_FFTW_FLAGS); ft_execute_disk_analysis('N', PA, F, N, M); ft_execute_cxf2disk('N', P, F, N, M); @@ -142,7 +142,7 @@ int main(void) { printf("\n"); P = ft_plan_rectdisk2cheb(N, beta); - ft_rectdisk_fftw_plan * QA = ft_plan_rectdisk_analysis(N, M); + ft_rectdisk_fftw_plan * QA = ft_plan_rectdisk_analysis(N, M, FT_FFTW_FLAGS); ft_execute_rectdisk_analysis('N', QA, F, N, M); ft_execute_cheb2rectdisk('N', P, F, N, M); diff --git a/examples/spinweighted.c b/examples/spinweighted.c index 05387a68..2965a903 100644 --- a/examples/spinweighted.c +++ b/examples/spinweighted.c @@ -68,7 +68,7 @@ int main(void) { printf("\n"); ft_spin_harmonic_plan * P = ft_plan_spinsph2fourier(N, 0); - ft_spinsphere_fftw_plan * PA = ft_plan_spinsph_analysis(N, M, 0); + ft_spinsphere_fftw_plan * PA = ft_plan_spinsph_analysis(N, M, 0, FT_FFTW_FLAGS); ft_execute_spinsph_analysis('N', PA, F, N, M); ft_execute_fourier2spinsph('N', P, F, N, M); @@ -106,7 +106,7 @@ int main(void) { ft_destroy_spinsphere_fftw_plan(PA); P = ft_plan_spinsph2fourier(N, 1); - PA = ft_plan_spinsph_analysis(N, M, 1); + PA = ft_plan_spinsph_analysis(N, M, 1, FT_FFTW_FLAGS); ft_execute_spinsph_analysis('N', PA, F, N, M); ft_execute_fourier2spinsph('N', P, F, N, M); diff --git a/src/fasttransforms.h b/src/fasttransforms.h index cf0d6e14..ce0ac507 100644 --- a/src/fasttransforms.h +++ b/src/fasttransforms.h @@ -423,13 +423,13 @@ typedef struct { /// Destroy a \ref ft_sphere_fftw_plan. void ft_destroy_sphere_fftw_plan(ft_sphere_fftw_plan * P); -ft_sphere_fftw_plan * ft_plan_sph_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1]); +ft_sphere_fftw_plan * ft_plan_sph_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1], const unsigned flags); /// Plan FFTW synthesis on the sphere. -ft_sphere_fftw_plan * ft_plan_sph_synthesis(const int N, const int M); +ft_sphere_fftw_plan * ft_plan_sph_synthesis(const int N, const int M, const unsigned flags); /// Plan FFTW analysis on the sphere. -ft_sphere_fftw_plan * ft_plan_sph_analysis(const int N, const int M); -ft_sphere_fftw_plan * ft_plan_sphv_synthesis(const int N, const int M); -ft_sphere_fftw_plan * ft_plan_sphv_analysis(const int N, const int M); +ft_sphere_fftw_plan * ft_plan_sph_analysis(const int N, const int M, const unsigned flags); +ft_sphere_fftw_plan * ft_plan_sphv_synthesis(const int N, const int M, const unsigned flags); +ft_sphere_fftw_plan * ft_plan_sphv_analysis(const int N, const int M, const unsigned flags); /// Execute FFTW synthesis on the sphere. void ft_execute_sph_synthesis(const char TRANS, const ft_sphere_fftw_plan * P, double * X, const int N, const int M); @@ -446,11 +446,11 @@ typedef struct { /// Destroy a \ref ft_triangle_fftw_plan. void ft_destroy_triangle_fftw_plan(ft_triangle_fftw_plan * P); -ft_triangle_fftw_plan * ft_plan_tri_with_kind(const int N, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1); +ft_triangle_fftw_plan * ft_plan_tri_with_kind(const int N, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1, const unsigned flags); /// Plan FFTW synthesis on the triangle. -ft_triangle_fftw_plan * ft_plan_tri_synthesis(const int N, const int M); +ft_triangle_fftw_plan * ft_plan_tri_synthesis(const int N, const int M, const unsigned flags); /// Plan FFTW analysis on the triangle. -ft_triangle_fftw_plan * ft_plan_tri_analysis(const int N, const int M); +ft_triangle_fftw_plan * ft_plan_tri_analysis(const int N, const int M, const unsigned flags); /// Execute FFTW synthesis on the triangle. void ft_execute_tri_synthesis(const char TRANS, const ft_triangle_fftw_plan * P, double * X, const int N, const int M); @@ -463,9 +463,9 @@ typedef struct { void ft_destroy_tetrahedron_fftw_plan(ft_tetrahedron_fftw_plan * P); -ft_tetrahedron_fftw_plan * ft_plan_tet_with_kind(const int N, const int L, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1, const fftw_r2r_kind kind2); -ft_tetrahedron_fftw_plan * ft_plan_tet_synthesis(const int N, const int L, const int M); -ft_tetrahedron_fftw_plan * ft_plan_tet_analysis(const int N, const int L, const int M); +ft_tetrahedron_fftw_plan * ft_plan_tet_with_kind(const int N, const int L, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1, const fftw_r2r_kind kind2, const unsigned flags); +ft_tetrahedron_fftw_plan * ft_plan_tet_synthesis(const int N, const int L, const int M, const unsigned flags); +ft_tetrahedron_fftw_plan * ft_plan_tet_analysis(const int N, const int L, const int M, const unsigned flags); void ft_execute_tet_synthesis(const char TRANS, const ft_tetrahedron_fftw_plan * P, double * X, const int N, const int L, const int M); void ft_execute_tet_analysis(const char TRANS, const ft_tetrahedron_fftw_plan * P, double * X, const int N, const int L, const int M); @@ -482,11 +482,11 @@ typedef struct { /// Destroy a \ref ft_disk_fftw_plan. void ft_destroy_disk_fftw_plan(ft_disk_fftw_plan * P); -ft_disk_fftw_plan * ft_plan_disk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1]); +ft_disk_fftw_plan * ft_plan_disk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1], const unsigned flags); /// Plan FFTW synthesis on the disk. -ft_disk_fftw_plan * ft_plan_disk_synthesis(const int N, const int M); +ft_disk_fftw_plan * ft_plan_disk_synthesis(const int N, const int M, const unsigned flags); /// Plan FFTW analysis on the disk. -ft_disk_fftw_plan * ft_plan_disk_analysis(const int N, const int M); +ft_disk_fftw_plan * ft_plan_disk_analysis(const int N, const int M, const unsigned flags); /// Execute FFTW synthesis on the disk. void ft_execute_disk_synthesis(const char TRANS, const ft_disk_fftw_plan * P, double * X, const int N, const int M); @@ -502,11 +502,11 @@ typedef struct { /// Destroy a \ref ft_rectdisk_fftw_plan. void ft_destroy_rectdisk_fftw_plan(ft_rectdisk_fftw_plan * P); -ft_rectdisk_fftw_plan * ft_plan_rectdisk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1]); +ft_rectdisk_fftw_plan * ft_plan_rectdisk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1], const unsigned flags); /// Plan FFTW synthesis on the rectangularized disk. -ft_rectdisk_fftw_plan * ft_plan_rectdisk_synthesis(const int N, const int M); +ft_rectdisk_fftw_plan * ft_plan_rectdisk_synthesis(const int N, const int M, const unsigned flags); /// Plan FFTW analysis on the rectangularized disk. -ft_rectdisk_fftw_plan * ft_plan_rectdisk_analysis(const int N, const int M); +ft_rectdisk_fftw_plan * ft_plan_rectdisk_analysis(const int N, const int M, const unsigned flags); /// Execute FFTW synthesis on the rectangularized disk. void ft_execute_rectdisk_synthesis(const char TRANS, const ft_rectdisk_fftw_plan * P, double * X, const int N, const int M); @@ -528,11 +528,11 @@ void ft_destroy_spinsphere_fftw_plan(ft_spinsphere_fftw_plan * P); int ft_get_spin_spinsphere_fftw_plan(const ft_spinsphere_fftw_plan * P); -ft_spinsphere_fftw_plan * ft_plan_spinsph_with_kind(const int N, const int M, const int S, const fftw_r2r_kind kind[2][1], const int sign); +ft_spinsphere_fftw_plan * ft_plan_spinsph_with_kind(const int N, const int M, const int S, const fftw_r2r_kind kind[2][1], const int sign, const unsigned flags); /// Plan FFTW synthesis on the sphere with spin. -ft_spinsphere_fftw_plan * ft_plan_spinsph_synthesis(const int N, const int M, const int S); +ft_spinsphere_fftw_plan * ft_plan_spinsph_synthesis(const int N, const int M, const int S, const unsigned flags); /// Plan FFTW analysis on the sphere with spin. -ft_spinsphere_fftw_plan * ft_plan_spinsph_analysis(const int N, const int M, const int S); +ft_spinsphere_fftw_plan * ft_plan_spinsph_analysis(const int N, const int M, const int S, const unsigned flags); /// Execute FFTW synthesis on the sphere with spin. void ft_execute_spinsph_synthesis(const char TRANS, const ft_spinsphere_fftw_plan * P, ft_complex * X, const int N, const int M); diff --git a/src/fftw.c b/src/fftw.c index c43179dd..88fefc18 100644 --- a/src/fftw.c +++ b/src/fftw.c @@ -72,7 +72,7 @@ void ft_destroy_sphere_fftw_plan(ft_sphere_fftw_plan * P) { free(P); } -ft_sphere_fftw_plan * ft_plan_sph_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1]) { +ft_sphere_fftw_plan * ft_plan_sph_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1], const unsigned flags) { int rank = 1; // not 2: we are computing 1d transforms // int n[] = {N}; // 1d transforms of length n // int idist = 4*N, odist = 4*N; @@ -84,16 +84,16 @@ ft_sphere_fftw_plan * ft_plan_sph_with_kind(const int N, const int M, const fftw P->Y = fftw_malloc(N*2*(M/2+1)*sizeof(double)); int howmany = (M+3)/4; - P->plantheta1 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], FT_FFTW_FLAGS); + P->plantheta1 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], flags); howmany = (M+2)/4; - P->plantheta2 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], FT_FFTW_FLAGS); + P->plantheta2 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], flags); howmany = (M+1)/4; - P->plantheta3 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], FT_FFTW_FLAGS); + P->plantheta3 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], flags); howmany = M/4; - P->plantheta4 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], FT_FFTW_FLAGS); + P->plantheta4 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], flags); n[0] = M; idist = odist = 1; @@ -101,31 +101,31 @@ ft_sphere_fftw_plan * ft_plan_sph_with_kind(const int N, const int M, const fftw howmany = N; double * Z = fftw_malloc(N*M*sizeof(double)); if (kind[2][0] == FFTW_HC2R) - P->planphi = fftw_plan_many_dft_c2r(rank, n, howmany, (fftw_complex *) P->Y, inembed, istride, idist, Z, onembed, ostride, odist, FT_FFTW_FLAGS); + P->planphi = fftw_plan_many_dft_c2r(rank, n, howmany, (fftw_complex *) P->Y, inembed, istride, idist, Z, onembed, ostride, odist, flags); else if (kind[2][0] == FFTW_R2HC) - P->planphi = fftw_plan_many_dft_r2c(rank, n, howmany, Z, inembed, istride, idist, (fftw_complex *) P->Y, onembed, ostride, odist, FT_FFTW_FLAGS); + P->planphi = fftw_plan_many_dft_r2c(rank, n, howmany, Z, inembed, istride, idist, (fftw_complex *) P->Y, onembed, ostride, odist, flags); fftw_free(Z); return P; } -ft_sphere_fftw_plan * ft_plan_sph_synthesis(const int N, const int M) { +ft_sphere_fftw_plan * ft_plan_sph_synthesis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_REDFT01}, {FFTW_RODFT01}, {FFTW_HC2R}}; - return ft_plan_sph_with_kind(N, M, kind); + return ft_plan_sph_with_kind(N, M, kind, flags); } -ft_sphere_fftw_plan * ft_plan_sph_analysis(const int N, const int M) { +ft_sphere_fftw_plan * ft_plan_sph_analysis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_REDFT10}, {FFTW_RODFT10}, {FFTW_R2HC}}; - return ft_plan_sph_with_kind(N, M, kind); + return ft_plan_sph_with_kind(N, M, kind, flags); } -ft_sphere_fftw_plan * ft_plan_sphv_synthesis(const int N, const int M) { +ft_sphere_fftw_plan * ft_plan_sphv_synthesis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_RODFT01}, {FFTW_REDFT01}, {FFTW_HC2R}}; - return ft_plan_sph_with_kind(N, M, kind); + return ft_plan_sph_with_kind(N, M, kind, flags); } -ft_sphere_fftw_plan * ft_plan_sphv_analysis(const int N, const int M) { +ft_sphere_fftw_plan * ft_plan_sphv_analysis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_RODFT10}, {FFTW_REDFT10}, {FFTW_R2HC}}; - return ft_plan_sph_with_kind(N, M, kind); + return ft_plan_sph_with_kind(N, M, kind, flags); } void ft_execute_sph_synthesis(const char TRANS, const ft_sphere_fftw_plan * P, double * X, const int N, const int M) { @@ -278,16 +278,16 @@ void ft_destroy_triangle_fftw_plan(ft_triangle_fftw_plan * P) { free(P); } -ft_triangle_fftw_plan * ft_plan_tri_with_kind(const int N, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1) { +ft_triangle_fftw_plan * ft_plan_tri_with_kind(const int N, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1, const unsigned flags) { ft_triangle_fftw_plan * P = malloc(sizeof(ft_triangle_fftw_plan)); double * X = fftw_malloc(N*M*sizeof(double)); - P->planxy = fftw_plan_r2r_2d(N, M, X, X, kind0, kind1, FT_FFTW_FLAGS); + P->planxy = fftw_plan_r2r_2d(N, M, X, X, kind0, kind1, flags); fftw_free(X); return P; } -ft_triangle_fftw_plan * ft_plan_tri_synthesis(const int N, const int M) {return ft_plan_tri_with_kind(N, M, FFTW_REDFT01, FFTW_REDFT01);} -ft_triangle_fftw_plan * ft_plan_tri_analysis(const int N, const int M) {return ft_plan_tri_with_kind(N, M, FFTW_REDFT10, FFTW_REDFT10);} +ft_triangle_fftw_plan * ft_plan_tri_synthesis(const int N, const int M, const unsigned flags) {return ft_plan_tri_with_kind(N, M, FFTW_REDFT01, FFTW_REDFT01, flags);} +ft_triangle_fftw_plan * ft_plan_tri_analysis(const int N, const int M, const unsigned flags) {return ft_plan_tri_with_kind(N, M, FFTW_REDFT10, FFTW_REDFT10, flags);} void ft_execute_tri_synthesis(const char TRANS, const ft_triangle_fftw_plan * P, double * X, const int N, const int M) { if (TRANS == 'N') { @@ -337,16 +337,16 @@ void ft_destroy_tetrahedron_fftw_plan(ft_tetrahedron_fftw_plan * P) { free(P); } -ft_tetrahedron_fftw_plan * ft_plan_tet_with_kind(const int N, const int L, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1, const fftw_r2r_kind kind2) { +ft_tetrahedron_fftw_plan * ft_plan_tet_with_kind(const int N, const int L, const int M, const fftw_r2r_kind kind0, const fftw_r2r_kind kind1, const fftw_r2r_kind kind2, const unsigned flags) { ft_tetrahedron_fftw_plan * P = malloc(sizeof(ft_tetrahedron_fftw_plan)); double * X = fftw_malloc(N*L*M*sizeof(double)); - P->planxyz = fftw_plan_r2r_3d(N, L, M, X, X, kind0, kind1, kind2, FT_FFTW_FLAGS); + P->planxyz = fftw_plan_r2r_3d(N, L, M, X, X, kind0, kind1, kind2, flags); fftw_free(X); return P; } -ft_tetrahedron_fftw_plan * ft_plan_tet_synthesis(const int N, const int L, const int M) {return ft_plan_tet_with_kind(N, L, M, FFTW_REDFT01, FFTW_REDFT01, FFTW_REDFT01);} -ft_tetrahedron_fftw_plan * ft_plan_tet_analysis(const int N, const int L, const int M) {return ft_plan_tet_with_kind(N, L, M, FFTW_REDFT10, FFTW_REDFT10, FFTW_REDFT10);} +ft_tetrahedron_fftw_plan * ft_plan_tet_synthesis(const int N, const int L, const int M, const unsigned flags) {return ft_plan_tet_with_kind(N, L, M, FFTW_REDFT01, FFTW_REDFT01, FFTW_REDFT01, flags);} +ft_tetrahedron_fftw_plan * ft_plan_tet_analysis(const int N, const int L, const int M, const unsigned flags) {return ft_plan_tet_with_kind(N, L, M, FFTW_REDFT10, FFTW_REDFT10, FFTW_REDFT10, flags);} void ft_execute_tet_synthesis(const char TRANS, const ft_tetrahedron_fftw_plan * P, double * X, const int N, const int L, const int M) { if (TRANS == 'N') { @@ -411,7 +411,7 @@ void ft_destroy_disk_fftw_plan(ft_disk_fftw_plan * P) { free(P); } -ft_disk_fftw_plan * ft_plan_disk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1]) { +ft_disk_fftw_plan * ft_plan_disk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1], const unsigned flags) { int rank = 1; // not 2: we are computing 1d transforms // int n[] = {N}; // 1d transforms of length n // int idist = 4*N, odist = 4*N; @@ -423,16 +423,16 @@ ft_disk_fftw_plan * ft_plan_disk_with_kind(const int N, const int M, const fftw_ P->Y = fftw_malloc(N*2*(M/2+1)*sizeof(double)); int howmany = (M+3)/4; - P->planr1 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], FT_FFTW_FLAGS); + P->planr1 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], flags); howmany = (M+2)/4; - P->planr2 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], FT_FFTW_FLAGS); + P->planr2 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], flags); howmany = (M+1)/4; - P->planr3 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], FT_FFTW_FLAGS); + P->planr3 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], flags); howmany = M/4; - P->planr4 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], FT_FFTW_FLAGS); + P->planr4 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], flags); n[0] = M; idist = odist = 1; @@ -440,21 +440,21 @@ ft_disk_fftw_plan * ft_plan_disk_with_kind(const int N, const int M, const fftw_ howmany = N; double * Z = fftw_malloc(N*M*sizeof(double)); if (kind[2][0] == FFTW_HC2R) - P->plantheta = fftw_plan_many_dft_c2r(rank, n, howmany, (fftw_complex *) P->Y, inembed, istride, idist, Z, onembed, ostride, odist, FT_FFTW_FLAGS); + P->plantheta = fftw_plan_many_dft_c2r(rank, n, howmany, (fftw_complex *) P->Y, inembed, istride, idist, Z, onembed, ostride, odist, flags); else if (kind[2][0] == FFTW_R2HC) - P->plantheta = fftw_plan_many_dft_r2c(rank, n, howmany, Z, inembed, istride, idist, (fftw_complex *) P->Y, onembed, ostride, odist, FT_FFTW_FLAGS); + P->plantheta = fftw_plan_many_dft_r2c(rank, n, howmany, Z, inembed, istride, idist, (fftw_complex *) P->Y, onembed, ostride, odist, flags); fftw_free(Z); return P; } -ft_disk_fftw_plan * ft_plan_disk_synthesis(const int N, const int M) { +ft_disk_fftw_plan * ft_plan_disk_synthesis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_REDFT01}, {FFTW_REDFT11}, {FFTW_HC2R}}; - return ft_plan_disk_with_kind(N, M, kind); + return ft_plan_disk_with_kind(N, M, kind, flags); } -ft_disk_fftw_plan * ft_plan_disk_analysis(const int N, const int M) { +ft_disk_fftw_plan * ft_plan_disk_analysis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_REDFT10}, {FFTW_REDFT11}, {FFTW_R2HC}}; - return ft_plan_disk_with_kind(N, M, kind); + return ft_plan_disk_with_kind(N, M, kind, flags); } void ft_execute_disk_synthesis(const char TRANS, const ft_disk_fftw_plan * P, double * X, const int N, const int M) { @@ -528,7 +528,7 @@ void ft_destroy_rectdisk_fftw_plan(ft_rectdisk_fftw_plan * P) { free(P); } -ft_rectdisk_fftw_plan * ft_plan_rectdisk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1]) { +ft_rectdisk_fftw_plan * ft_plan_rectdisk_with_kind(const int N, const int M, const fftw_r2r_kind kind[3][1], const unsigned flags) { int rank = 1; // not 2: we are computing 1d transforms // int n[] = {N}; // 1d transforms of length n // int idist = 2*N, odist = 2*N; @@ -540,29 +540,29 @@ ft_rectdisk_fftw_plan * ft_plan_rectdisk_with_kind(const int N, const int M, con double * X = fftw_malloc(N*M*sizeof(double)); int howmany = (M+1)/2; - P->planx1 = fftw_plan_many_r2r(rank, n, howmany, X, inembed, istride, idist, X, onembed, ostride, odist, kind[0], FT_FFTW_FLAGS); + P->planx1 = fftw_plan_many_r2r(rank, n, howmany, X, inembed, istride, idist, X, onembed, ostride, odist, kind[0], flags); howmany = M/2; - P->planx2 = fftw_plan_many_r2r(rank, n, howmany, X, inembed, istride, idist, X, onembed, ostride, odist, kind[1], FT_FFTW_FLAGS); + P->planx2 = fftw_plan_many_r2r(rank, n, howmany, X, inembed, istride, idist, X, onembed, ostride, odist, kind[1], flags); n[0] = M; idist = odist = 1; istride = ostride = N; howmany = N; - P->plany = fftw_plan_many_r2r(rank, n, howmany, X, inembed, istride, idist, X, onembed, ostride, odist, kind[2], FT_FFTW_FLAGS); + P->plany = fftw_plan_many_r2r(rank, n, howmany, X, inembed, istride, idist, X, onembed, ostride, odist, kind[2], flags); fftw_free(X); return P; } -ft_rectdisk_fftw_plan * ft_plan_rectdisk_synthesis(const int N, const int M) { +ft_rectdisk_fftw_plan * ft_plan_rectdisk_synthesis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_REDFT01}, {FFTW_RODFT01}, {FFTW_REDFT01}}; - return ft_plan_rectdisk_with_kind(N, M, kind); + return ft_plan_rectdisk_with_kind(N, M, kind, flags); } -ft_rectdisk_fftw_plan * ft_plan_rectdisk_analysis(const int N, const int M) { +ft_rectdisk_fftw_plan * ft_plan_rectdisk_analysis(const int N, const int M, const unsigned flags) { const fftw_r2r_kind kind[3][1] = {{FFTW_REDFT10}, {FFTW_RODFT10}, {FFTW_REDFT10}}; - return ft_plan_rectdisk_with_kind(N, M, kind); + return ft_plan_rectdisk_with_kind(N, M, kind, flags); } void ft_execute_rectdisk_synthesis(const char TRANS, const ft_rectdisk_fftw_plan * P, double * X, const int N, const int M) { @@ -623,7 +623,7 @@ void ft_destroy_spinsphere_fftw_plan(ft_spinsphere_fftw_plan * P) { int ft_get_spin_spinsphere_fftw_plan(const ft_spinsphere_fftw_plan * P) {return P->S;} -ft_spinsphere_fftw_plan * ft_plan_spinsph_with_kind(const int N, const int M, const int S, const fftw_r2r_kind kind[2][1], const int sign) { +ft_spinsphere_fftw_plan * ft_plan_spinsph_with_kind(const int N, const int M, const int S, const fftw_r2r_kind kind[2][1], const int sign, const unsigned flags) { int rank = 1; // not 2: we are computing 1d transforms // int n[] = {N}; // 1d transforms of length n // int idist = 8*N, odist = 8*N; @@ -635,38 +635,38 @@ ft_spinsphere_fftw_plan * ft_plan_spinsph_with_kind(const int N, const int M, co P->Y = fftw_malloc(2*N*M*sizeof(double)); int howmany = (M+3)/4; - P->plantheta1 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], FT_FFTW_FLAGS); + P->plantheta1 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], flags); howmany = (M+2)/4; - P->plantheta2 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], FT_FFTW_FLAGS); + P->plantheta2 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], flags); howmany = (M+1)/4; - P->plantheta3 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], FT_FFTW_FLAGS); + P->plantheta3 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[1], flags); howmany = M/4; - P->plantheta4 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], FT_FFTW_FLAGS); + P->plantheta4 = fftw_plan_many_r2r(rank, n, howmany, P->Y, inembed, istride, idist, P->Y, onembed, ostride, odist, kind[0], flags); n[0] = M; idist = odist = 1; istride = ostride = N; howmany = N; fftw_complex * Z = fftw_malloc(N*M*sizeof(fftw_complex)); - P->planphi = fftw_plan_many_dft(rank, n, howmany, (fftw_complex *) P->Y, inembed, istride, idist, Z, onembed, ostride, odist, sign, FT_FFTW_FLAGS); + P->planphi = fftw_plan_many_dft(rank, n, howmany, (fftw_complex *) P->Y, inembed, istride, idist, Z, onembed, ostride, odist, sign, flags); fftw_free(Z); P->S = S; return P; } -ft_spinsphere_fftw_plan * ft_plan_spinsph_synthesis(const int N, const int M, const int S) { +ft_spinsphere_fftw_plan * ft_plan_spinsph_synthesis(const int N, const int M, const int S, const unsigned flags) { const fftw_r2r_kind evenkind[2][1] = {{FFTW_REDFT01}, {FFTW_RODFT01}}; const fftw_r2r_kind oddkind[2][1] = {{FFTW_RODFT01}, {FFTW_REDFT01}}; - return ft_plan_spinsph_with_kind(N, M, S, S%2 == 0 ? evenkind : oddkind, FFTW_BACKWARD); + return ft_plan_spinsph_with_kind(N, M, S, S%2 == 0 ? evenkind : oddkind, FFTW_BACKWARD, flags); } -ft_spinsphere_fftw_plan * ft_plan_spinsph_analysis(const int N, const int M, const int S) { +ft_spinsphere_fftw_plan * ft_plan_spinsph_analysis(const int N, const int M, const int S, const unsigned flags) { const fftw_r2r_kind evenkind[2][1] = {{FFTW_REDFT10}, {FFTW_RODFT10}}; const fftw_r2r_kind oddkind[2][1] = {{FFTW_RODFT10}, {FFTW_REDFT10}}; - return ft_plan_spinsph_with_kind(N, M, S, S%2 == 0 ? evenkind : oddkind, FFTW_FORWARD); + return ft_plan_spinsph_with_kind(N, M, S, S%2 == 0 ? evenkind : oddkind, FFTW_FORWARD, flags); } void ft_execute_spinsph_synthesis(const char TRANS, const ft_spinsphere_fftw_plan * P, ft_complex * X, const int N, const int M) { diff --git a/src/ftinternal.h b/src/ftinternal.h index 120f4e3b..5f377ff9 100644 --- a/src/ftinternal.h +++ b/src/ftinternal.h @@ -542,7 +542,4 @@ void swap_warp_NEONf(float * A, float * B, const int N); void warp(double * A, const int N, const int M, const int L); void warp_t(double * A, const int N, const int M, const int L); -// A bitwise OR ('|') of zero or more of the following: FFTW_ESTIMATE FFTW_MEASURE FFTW_PATIENT FFTW_EXHAUSTIVE FFTW_WISDOM_ONLY FFTW_DESTROY_INPUT FFTW_PRESERVE_INPUT FFTW_UNALIGNED -#define FT_FFTW_FLAGS FFTW_MEASURE | FFTW_DESTROY_INPUT - #endif // FTINTERNAL_H diff --git a/src/ftutilities.h b/src/ftutilities.h index cb4f6990..ad8f2bac 100644 --- a/src/ftutilities.h +++ b/src/ftutilities.h @@ -20,6 +20,9 @@ for (int ntimes = 0; ntimes < NTIMES; ntimes++) \ BLOCK; \ gettimeofday(&END, NULL); +// A bitwise OR ('|') of zero or more of the following: FFTW_ESTIMATE FFTW_MEASURE FFTW_PATIENT FFTW_EXHAUSTIVE FFTW_WISDOM_ONLY FFTW_DESTROY_INPUT FFTW_PRESERVE_INPUT FFTW_UNALIGNED +#define FT_FFTW_FLAGS FFTW_MEASURE | FFTW_DESTROY_INPUT + void printmat(char * MAT, char * FMT, double * A, int n, int m); void print_summary_size(size_t i); double * copymat(double * A, int n, int m); diff --git a/test/test_fftw.c b/test/test_fftw.c index 646d4bc5..4d6adf87 100644 --- a/test/test_fftw.c +++ b/test/test_fftw.c @@ -46,8 +46,8 @@ int main(int argc, const char * argv[]) { A = sphrand(N, M); B = copymat(A, N, M); P = ft_plan_sph2fourier(N); - PS = ft_plan_sph_synthesis(N, M); - PA = ft_plan_sph_analysis(N, M); + PS = ft_plan_sph_synthesis(N, M, FT_FFTW_FLAGS); + PA = ft_plan_sph_analysis(N, M, FT_FFTW_FLAGS); ft_execute_sph2fourier('N', P, A, N, M); ft_execute_sph_synthesis('N', PS, A, N, M); @@ -82,8 +82,8 @@ int main(int argc, const char * argv[]) { A = sphrand(N, M); P = ft_plan_sph2fourier(N); - PS = ft_plan_sph_synthesis(N, M); - PA = ft_plan_sph_analysis(N, M); + PS = ft_plan_sph_synthesis(N, M, FT_FFTW_FLAGS); + PA = ft_plan_sph_analysis(N, M, FT_FFTW_FLAGS); FT_TIME({ft_execute_sph2fourier('N', P, A, N, M); ft_execute_sph_synthesis('N', PS, A, N, M);}, start, end, NTIMES) printf("%d %.6f", N, elapsed(&start, &end, NTIMES)); @@ -108,8 +108,8 @@ int main(int argc, const char * argv[]) { A = sphrand(N, M); B = copymat(A, N, M); P = ft_plan_sph2fourier(N); - PS = ft_plan_sphv_synthesis(N, M); - PA = ft_plan_sphv_analysis(N, M); + PS = ft_plan_sphv_synthesis(N, M, FT_FFTW_FLAGS); + PA = ft_plan_sphv_analysis(N, M, FT_FFTW_FLAGS); ft_execute_sphv2fourier('N', P, A, N, M); ft_execute_sphv_synthesis('N', PS, A, N, M); @@ -144,8 +144,8 @@ int main(int argc, const char * argv[]) { A = sphrand(N, M); P = ft_plan_sph2fourier(N); - PS = ft_plan_sphv_synthesis(N, M); - PA = ft_plan_sphv_analysis(N, M); + PS = ft_plan_sphv_synthesis(N, M, FT_FFTW_FLAGS); + PA = ft_plan_sphv_analysis(N, M, FT_FFTW_FLAGS); FT_TIME({ft_execute_sphv2fourier('N', P, A, N, M); ft_execute_sphv_synthesis('N', PS, A, N, M);}, start, end, NTIMES) printf("%d %.6f", N, elapsed(&start, &end, NTIMES)); @@ -170,8 +170,8 @@ int main(int argc, const char * argv[]) { A = trirand(N, M); B = copymat(A, N, M); P = ft_plan_tri2cheb(N, alpha, beta, gamma); - QS = ft_plan_tri_synthesis(N, M); - QA = ft_plan_tri_analysis(N, M); + QS = ft_plan_tri_synthesis(N, M, FT_FFTW_FLAGS); + QA = ft_plan_tri_analysis(N, M, FT_FFTW_FLAGS); ft_execute_tri2cheb('N', P, A, N, M); ft_execute_tri_synthesis('N', QS, A, N, M); @@ -206,8 +206,8 @@ int main(int argc, const char * argv[]) { A = trirand(N, M); P = ft_plan_tri2cheb(N, alpha, beta, gamma); - QS = ft_plan_tri_synthesis(N, M); - QA = ft_plan_tri_analysis(N, M); + QS = ft_plan_tri_synthesis(N, M, FT_FFTW_FLAGS); + QA = ft_plan_tri_analysis(N, M, FT_FFTW_FLAGS); FT_TIME({ft_execute_tri2cheb('N', P, A, N, M); ft_execute_tri_synthesis('N', QS, A, N, M);}, start, end, NTIMES) printf("%d %.6f", N, elapsed(&start, &end, NTIMES)); @@ -232,8 +232,8 @@ int main(int argc, const char * argv[]) { A = diskrand(N, M); B = copymat(A, N, M); P = ft_plan_disk2cxf(N, alpha, beta); - RS = ft_plan_disk_synthesis(N, M); - RA = ft_plan_disk_analysis(N, M); + RS = ft_plan_disk_synthesis(N, M, FT_FFTW_FLAGS); + RA = ft_plan_disk_analysis(N, M, FT_FFTW_FLAGS); ft_execute_disk2cxf('N', P, A, N, M); ft_execute_disk_synthesis('N', RS, A, N, M); @@ -268,8 +268,8 @@ int main(int argc, const char * argv[]) { A = diskrand(N, M); P = ft_plan_disk2cxf(N, alpha, beta); - RS = ft_plan_disk_synthesis(N, M); - RA = ft_plan_disk_analysis(N, M); + RS = ft_plan_disk_synthesis(N, M, FT_FFTW_FLAGS); + RA = ft_plan_disk_analysis(N, M, FT_FFTW_FLAGS); FT_TIME({ft_execute_disk2cxf('N', P, A, N, M); ft_execute_disk_synthesis('N', RS, A, N, M);}, start, end, NTIMES) printf("%d %.6f", N, elapsed(&start, &end, NTIMES)); @@ -294,8 +294,8 @@ int main(int argc, const char * argv[]) { A = rectdiskrand(N, M); B = copymat(A, N, M); P = ft_plan_rectdisk2cheb(N, beta); - SS = ft_plan_rectdisk_synthesis(N, M); - SA = ft_plan_rectdisk_analysis(N, M); + SS = ft_plan_rectdisk_synthesis(N, M, FT_FFTW_FLAGS); + SA = ft_plan_rectdisk_analysis(N, M, FT_FFTW_FLAGS); ft_execute_rectdisk2cheb('N', P, A, N, M); ft_execute_rectdisk_synthesis('N', SS, A, N, M); @@ -330,8 +330,8 @@ int main(int argc, const char * argv[]) { A = rectdiskrand(N, M); P = ft_plan_rectdisk2cheb(N, beta); - SS = ft_plan_rectdisk_synthesis(N, M); - SA = ft_plan_rectdisk_analysis(N, M); + SS = ft_plan_rectdisk_synthesis(N, M, FT_FFTW_FLAGS); + SA = ft_plan_rectdisk_analysis(N, M, FT_FFTW_FLAGS); FT_TIME({ft_execute_rectdisk2cheb('N', P, A, N, M); ft_execute_rectdisk_synthesis('N', SS, A, N, M);}, start, end, NTIMES) printf("%d %.6f", N, elapsed(&start, &end, NTIMES)); @@ -356,8 +356,8 @@ int main(int argc, const char * argv[]) { A = tetrand(N, L, M); B = copymat(A, N, L*M); P = ft_plan_tet2cheb(N, alpha, beta, gamma, delta); - TS = ft_plan_tet_synthesis(N, L, M); - TA = ft_plan_tet_analysis(N, L, M); + TS = ft_plan_tet_synthesis(N, L, M, FT_FFTW_FLAGS); + TA = ft_plan_tet_analysis(N, L, M, FT_FFTW_FLAGS); ft_execute_tet2cheb('N', P, A, N, L, M); ft_execute_tet_synthesis('N', TS, A, N, L, M); @@ -399,8 +399,8 @@ int main(int argc, const char * argv[]) { A = tetrand(N, L, M); P = ft_plan_tet2cheb(N, alpha, beta, gamma, delta); - TS = ft_plan_tet_synthesis(N, L, M); - TA = ft_plan_tet_analysis(N, L, M); + TS = ft_plan_tet_synthesis(N, L, M, FT_FFTW_FLAGS); + TA = ft_plan_tet_analysis(N, L, M, FT_FFTW_FLAGS); FT_TIME({ft_execute_tet2cheb('N', P, A, N, L, M); ft_execute_tet_synthesis('N', TS, A, N, L, M);}, start, end, NTIMES) printf("%d %.6f", N, elapsed(&start, &end, NTIMES)); @@ -429,8 +429,8 @@ int main(int argc, const char * argv[]) { B = (double *) BC; SP = ft_plan_spinsph2fourier(N, S); - US = ft_plan_spinsph_synthesis(N, M, S); - UA = ft_plan_spinsph_analysis(N, M, S); + US = ft_plan_spinsph_synthesis(N, M, S, FT_FFTW_FLAGS); + UA = ft_plan_spinsph_analysis(N, M, S, FT_FFTW_FLAGS); ft_execute_spinsph2fourier('N', SP, AC, N, M); ft_execute_spinsph_synthesis('N', US, AC, N, M); @@ -467,8 +467,8 @@ int main(int argc, const char * argv[]) { for (int S = -2; S <= 2; S++) { ft_complex * AC = spinsphrand(N, M, S); SP = ft_plan_spinsph2fourier(N, S); - US = ft_plan_spinsph_synthesis(N, M, S); - UA = ft_plan_spinsph_analysis(N, M, S); + US = ft_plan_spinsph_synthesis(N, M, S, FT_FFTW_FLAGS); + UA = ft_plan_spinsph_analysis(N, M, S, FT_FFTW_FLAGS); FT_TIME({ft_execute_spinsph2fourier('N', SP, AC, N, M); ft_execute_spinsph_synthesis('N', US, AC, N, M);}, start, end, NTIMES) printf("%d %.6f", N, elapsed(&start, &end, NTIMES));