Skip to content

Commit

Permalink
add char TRANS to tet2cheb
Browse files Browse the repository at this point in the history
  • Loading branch information
MikaelSlevinsky committed Feb 10, 2021
1 parent 4a30369 commit 081adf2
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 38 deletions.
88 changes: 60 additions & 28 deletions src/drivers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1080,34 +1080,66 @@ ft_harmonic_plan * ft_plan_tet2cheb(const int n, const double alpha, const doubl
return P;
}

void ft_execute_tet2cheb(const ft_harmonic_plan * P, double * A, const int N, const int L, const int M) {
execute_tet_hi2lo_AVX(P->RP[0], P->RP[1], A, P->B, L, M);
if ((P->beta + P->gamma + P->delta != -2.5) || (P->alpha != -0.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, L, 1.0, P->P[0], N, A+N*L*m, N);
if ((P->gamma + P->delta != -1.5) || (P->beta != -0.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, L, 1.0, P->P[1], N, A+N*L*m, N);
if ((P->delta != -0.5) || (P->gamma != -0.5))
for (int n = 0; n < N; n++)
for (int l = 0; l < L; l++)
cblas_dtrmv(CblasColMajor, CblasUpper, CblasTrans, CblasNonUnit, N, P->P[2], N, A+n+N*l, N*L);
chebyshev_normalization_3d(A, N, L, M);
}

void ft_execute_cheb2tet(const ft_harmonic_plan * P, double * A, const int N, const int L, const int M) {
chebyshev_normalization_3d_t(A, N, L, M);
if ((P->gamma != -0.5) || (P->delta != -0.5))
for (int n = 0; n < N; n++)
for (int l = 0; l < L; l++)
cblas_dtrmv(CblasColMajor, CblasUpper, CblasTrans, CblasNonUnit, N, P->Pinv[2], N, A+n+N*l, N*L);
if ((P->beta != -0.5) || (P->gamma + P->delta != -1.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, L, 1.0, P->Pinv[1], N, A+N*L*m, N);
if ((P->alpha != -0.5) || (P->beta + P->gamma + P->delta != -2.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, L, 1.0, P->Pinv[0], N, A+N*L*m, N);
execute_tet_lo2hi_AVX(P->RP[0], P->RP[1], A, P->B, L, M);
void ft_execute_tet2cheb(const char TRANS, const ft_harmonic_plan * P, double * A, const int N, const int L, const int M) {
if (TRANS == 'N') {
execute_tet_hi2lo_AVX(P->RP[0], P->RP[1], A, P->B, L, M);
if ((P->beta + P->gamma + P->delta != -2.5) || (P->alpha != -0.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, L, 1.0, P->P[0], N, A+N*L*m, N);
if ((P->gamma + P->delta != -1.5) || (P->beta != -0.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, L, 1.0, P->P[1], N, A+N*L*m, N);
if ((P->delta != -0.5) || (P->gamma != -0.5))
for (int n = 0; n < N; n++)
for (int l = 0; l < L; l++)
cblas_dtrmv(CblasColMajor, CblasUpper, CblasTrans, CblasNonUnit, N, P->P[2], N, A+n+N*l, N*L);
chebyshev_normalization_3d(A, N, L, M);
}
else if (TRANS == 'T') {
chebyshev_normalization_3d(A, N, L, M);
if ((P->gamma != -0.5) || (P->delta != -0.5))
for (int n = 0; n < N; n++)
for (int l = 0; l < L; l++)
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans, CblasNonUnit, N, P->P[2], N, A+n+N*l, N*L);
if ((P->beta != -0.5) || (P->gamma + P->delta != -1.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, L, 1.0, P->P[1], N, A+N*L*m, N);
if ((P->alpha != -0.5) || (P->beta + P->gamma + P->delta != -2.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, L, 1.0, P->P[0], N, A+N*L*m, N);
execute_tet_lo2hi_AVX(P->RP[0], P->RP[1], A, P->B, L, M);
}
}

void ft_execute_cheb2tet(const char TRANS, const ft_harmonic_plan * P, double * A, const int N, const int L, const int M) {
if (TRANS == 'N') {
chebyshev_normalization_3d_t(A, N, L, M);
if ((P->gamma != -0.5) || (P->delta != -0.5))
for (int n = 0; n < N; n++)
for (int l = 0; l < L; l++)
cblas_dtrmv(CblasColMajor, CblasUpper, CblasTrans, CblasNonUnit, N, P->Pinv[2], N, A+n+N*l, N*L);
if ((P->beta != -0.5) || (P->gamma + P->delta != -1.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, L, 1.0, P->Pinv[1], N, A+N*L*m, N);
if ((P->alpha != -0.5) || (P->beta + P->gamma + P->delta != -2.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, L, 1.0, P->Pinv[0], N, A+N*L*m, N);
execute_tet_lo2hi_AVX(P->RP[0], P->RP[1], A, P->B, L, M);
}
else if (TRANS == 'T') {
execute_tet_hi2lo_AVX(P->RP[0], P->RP[1], A, P->B, L, M);
if ((P->beta + P->gamma + P->delta != -2.5) || (P->alpha != -0.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, L, 1.0, P->Pinv[0], N, A+N*L*m, N);
if ((P->gamma + P->delta != -1.5) || (P->beta != -0.5))
for (int m = 0; m < M; m++)
cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, L, 1.0, P->Pinv[1], N, A+N*L*m, N);
if ((P->delta != -0.5) || (P->gamma != -0.5))
for (int n = 0; n < N; n++)
for (int l = 0; l < L; l++)
cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans, CblasNonUnit, N, P->Pinv[2], N, A+n+N*l, N*L);
chebyshev_normalization_3d_t(A, N, L, M);
}
}

void ft_destroy_spin_harmonic_plan(ft_spin_harmonic_plan * P) {
Expand Down
4 changes: 2 additions & 2 deletions src/fasttransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,9 @@ void ft_execute_cheb2rectdisk(const char TRANS, const ft_harmonic_plan * P, doub
ft_harmonic_plan * ft_plan_tet2cheb(const int n, const double alpha, const double beta, const double gamma, const double delta);

/// Transform a tetrahedral harmonic expansion to a trivariate Chebyshev series.
void ft_execute_tet2cheb(const ft_harmonic_plan * P, double * A, const int N, const int L, const int M);
void ft_execute_tet2cheb(const char TRANS, const ft_harmonic_plan * P, double * A, const int N, const int L, const int M);
/// Transform a trivariate Chebyshev series to a tetrahedral harmonic expansion.
void ft_execute_cheb2tet(const ft_harmonic_plan * P, double * A, const int N, const int L, const int M);
void ft_execute_cheb2tet(const char TRANS, const ft_harmonic_plan * P, double * A, const int N, const int L, const int M);

/// Data structure to store a \ref ft_spin_rotation_plan, and various arrays to represent 1D orthogonal polynomial transforms.
typedef struct {
Expand Down
18 changes: 14 additions & 4 deletions test/test_drivers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1232,8 +1232,8 @@ int main(int argc, const char * argv[]) {
B = copymat(A, N, L*M);
P = ft_plan_tet2cheb(N, alpha, beta, gamma, delta);

ft_execute_tet2cheb(P, A, N, L, M);
ft_execute_cheb2tet(P, A, N, L, M);
ft_execute_tet2cheb('N', P, A, N, L, M);
ft_execute_cheb2tet('N', P, A, N, L, M);

err = ft_norm_2arg(A, B, N*L*M)/ft_norm_1arg(B, N*L*M);
printf("ϵ_2 \t\t (N×L×M) = (%5ix%5i×%5i): \t |%20.2e ", N, L, M, err);
Expand All @@ -1242,6 +1242,16 @@ int main(int argc, const char * argv[]) {
printf("ϵ_∞ \t\t (N×L×M) = (%5ix%5i×%5i): \t |%20.2e ", N, L, M, err);
ft_checktest(err, 4*pow(N*L*M, 2.0/3.0), &checksum);

ft_execute_tet2cheb('T', P, A, N, L, M);
ft_execute_cheb2tet('T', P, A, N, L, M);

err = ft_norm_2arg(A, B, N*L*M)/ft_norm_1arg(B, N*L*M);
printf("ϵ_2 tranposed \t (N×L×M) = (%5ix%5i×%5i): \t |%20.2e ", N, L, M, err);
ft_checktest(err, 64*pow(N*L*M, 2.0/3.0), &checksum);
err = ft_normInf_2arg(A, B, N*L*M)/ft_normInf_1arg(B, N*L*M);
printf("ϵ_∞ tranposed \t (N×L×M) = (%5ix%5i×%5i): \t |%20.2e ", N, L, M, err);
ft_checktest(err, 32*pow(N*L*M, 3.0/3.0), &checksum);

free(A);
free(B);
ft_destroy_harmonic_plan(P);
Expand All @@ -1257,10 +1267,10 @@ int main(int argc, const char * argv[]) {
A = tetrand(N, L, M);
P = ft_plan_tet2cheb(N, alpha, beta, gamma, delta);

FT_TIME(ft_execute_tet2cheb(P, A, N, L, M), start, end, NTIMES)
FT_TIME(ft_execute_tet2cheb('N', P, A, N, L, M), start, end, NTIMES)
printf("%d %.6f", N, elapsed(&start, &end, NTIMES));

FT_TIME(ft_execute_cheb2tet(P, A, N, L, M), start, end, NTIMES)
FT_TIME(ft_execute_cheb2tet('N', P, A, N, L, M), start, end, NTIMES)
printf(" %.6f", elapsed(&start, &end, NTIMES));

printf("\n");
Expand Down
8 changes: 4 additions & 4 deletions test/test_fftw.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,10 @@ int main(int argc, const char * argv[]) {
TS = ft_plan_tet_synthesis(N, L, M);
TA = ft_plan_tet_analysis(N, L, M);

ft_execute_tet2cheb(P, A, N, L, M);
ft_execute_tet2cheb('N', P, A, N, L, M);
ft_execute_tet_synthesis(TS, A, N, L, M);
ft_execute_tet_analysis(TA, A, N, L, M);
ft_execute_cheb2tet(P, A, N, L, M);
ft_execute_cheb2tet('N', P, A, N, L, M);

err = ft_norm_2arg(A, B, N*L*M)/ft_norm_1arg(B, N*L*M);
printf("ϵ_2 \t\t (N×L×M) = (%5ix%5i×%5i): \t |%20.2e ", N, L, M, err);
Expand Down Expand Up @@ -375,10 +375,10 @@ int main(int argc, const char * argv[]) {
TS = ft_plan_tet_synthesis(N, L, M);
TA = ft_plan_tet_analysis(N, L, M);

FT_TIME({ft_execute_tet2cheb(P, A, N, L, M); ft_execute_tet_synthesis(TS, A, N, L, M);}, start, end, NTIMES)
FT_TIME({ft_execute_tet2cheb('N', P, A, N, L, M); ft_execute_tet_synthesis(TS, A, N, L, M);}, start, end, NTIMES)
printf("%d %.6f", N, elapsed(&start, &end, NTIMES));

FT_TIME({ft_execute_tet_analysis(TA, A, N, L, M); ft_execute_cheb2tet(P, A, N, L, M);}, start, end, NTIMES)
FT_TIME({ft_execute_tet_analysis(TA, A, N, L, M); ft_execute_cheb2tet('N', P, A, N, L, M);}, start, end, NTIMES)
printf(" %.6f\n", elapsed(&start, &end, NTIMES));

free(A);
Expand Down

0 comments on commit 081adf2

Please sign in to comment.