From eb788fb684300f281936c951368729f84913a462 Mon Sep 17 00:00:00 2001 From: "saru@southpole" Date: Tue, 19 Nov 2024 18:24:27 -0800 Subject: [PATCH 1/4] added decimation technique, verified with all_around_metal --- Source/Code_Definitions.H | 1 + Source/Solver/Transport/NEGF/CNT.H | 7 +- Source/Solver/Transport/NEGF/CNT.cpp | 59 ++- Source/Solver/Transport/NEGF/Graphene.H | 4 +- Source/Solver/Transport/NEGF/Matrix_Block.H | 45 +- .../Solver/Transport/NEGF/Matrix_Block_Util.H | 490 +++++++++++------- Source/Solver/Transport/NEGF/NEGF_Common.H | 33 +- Source/Solver/Transport/NEGF/NEGF_Common.cpp | 166 +++++- Source/Solver/Transport/Transport.cpp | 7 +- Source/Utils/CodeUtils/MathLib.H | 18 + Source/Utils/CodeUtils/MathLib.cpp | 195 +++++++ Source/Utils/CodeUtils/cudaErrorCheck.H | 18 + .../decimation_technique/Source/main.cpp | 10 +- 13 files changed, 821 insertions(+), 232 deletions(-) create mode 100644 Source/Utils/CodeUtils/MathLib.H create mode 100644 Source/Utils/CodeUtils/MathLib.cpp create mode 100644 Source/Utils/CodeUtils/cudaErrorCheck.H diff --git a/Source/Code_Definitions.H b/Source/Code_Definitions.H index d051ace..d446473 100644 --- a/Source/Code_Definitions.H +++ b/Source/Code_Definitions.H @@ -6,6 +6,7 @@ #define VFRAC_THREASHOLD 1e-5 #define NUM_MODES 1 +#define BLOCK_SIZE 1 #define NUM_CONTACTS 2 #define NUM_ENERGY_PTS_REAL 10 diff --git a/Source/Solver/Transport/NEGF/CNT.H b/Source/Solver/Transport/NEGF/CNT.H index 94ed50d..f354852 100644 --- a/Source/Solver/Transport/NEGF/CNT.H +++ b/Source/Solver/Transport/NEGF/CNT.H @@ -10,9 +10,9 @@ #include "NEGF_Common.H" -class c_CNT : public c_NEGF_Common +class c_CNT : public c_NEGF_Common { - using BlkType = ComplexType[NUM_MODES]; + using BlkType = ComplexType[BLOCK_SIZE]; amrex::Real acc = 1.42e-10; amrex::Real gamma = 2.5; @@ -75,7 +75,8 @@ class c_CNT : public c_NEGF_Common virtual int get_offDiag_repeatBlkSize() final { return 2; } virtual AMREX_GPU_HOST_DEVICE void Compute_SurfaceGreensFunction( - MatrixBlock &gr, const ComplexType EmU) final; + MatrixBlock &gr, const ComplexType E, + const ComplexType U) final; public: static int get_1D_site_id(int par_id_local_to_NS) diff --git a/Source/Solver/Transport/NEGF/CNT.cpp b/Source/Solver/Transport/NEGF/CNT.cpp index 5618575..5917371 100644 --- a/Source/Solver/Transport/NEGF/CNT.cpp +++ b/Source/Solver/Transport/NEGF/CNT.cpp @@ -66,8 +66,8 @@ void c_CNT::Set_MaterialSpecificParameters() void c_CNT::Set_BlockDegeneracyVector(amrex::Vector &vec) { - vec.resize(NUM_MODES); - for (int m = 0; m < NUM_MODES; ++m) + vec.resize(BLOCK_SIZE); + for (int m = 0; m < BLOCK_SIZE; ++m) { vec[m] = mode_degen_vec[m]; } @@ -293,7 +293,8 @@ ComplexType c_CNT::get_beta(int J) void c_CNT::Define_MPI_BlkType() { - MPI_Type_vector(1, NUM_MODES, NUM_MODES, MPI_DOUBLE_COMPLEX, &MPI_BlkType); + MPI_Type_vector(1, BLOCK_SIZE, BLOCK_SIZE, MPI_DOUBLE_COMPLEX, + &MPI_BlkType); MPI_Type_commit(&MPI_BlkType); } @@ -310,7 +311,7 @@ void c_CNT::Construct_Hamiltonian() h_minusHa(i) = 0.; } - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { int J = mode_vec[j]; beta.block[j] = get_beta(J); @@ -352,32 +353,42 @@ void c_CNT::Define_ContactInfo() AMREX_GPU_HOST_DEVICE void c_CNT::Compute_SurfaceGreensFunction(MatrixBlock &gr, - const ComplexType EmU) + const ComplexType E, + const ComplexType U) { - auto EmU_sq = pow(EmU, 2.); - auto gamma_sq = pow(gamma, 2.); - - for (int i = 0; i < NUM_MODES; ++i) + if (use_decimation) + { + c_NEGF_Common::DecimationTechnique(gr, E - U); + } + else { - auto Factor = EmU_sq + gamma_sq - pow(beta.block[i], 2); + ComplexType EmU = E - U; + auto EmU_sq = pow(EmU, 2.); + auto gamma_sq = pow(gamma, 2.); - auto Sqrt = sqrt(pow(Factor, 2) - 4. * EmU_sq * gamma_sq); + for (int i = 0; i < BLOCK_SIZE; ++i) + { + auto Factor = EmU_sq + gamma_sq - pow(beta.block[i], 2); + + auto Sqrt = sqrt(pow(Factor, 2) - 4. * EmU_sq * gamma_sq); - auto Denom = 2. * gamma_sq * EmU; + auto Denom = 2. * gamma_sq * EmU; - auto val1 = (Factor + Sqrt) / Denom; - auto val2 = (Factor - Sqrt) / Denom; + auto val1 = (Factor + Sqrt) / Denom; + auto val2 = (Factor - Sqrt) / Denom; - if (val1.imag() < 0.) - gr.block[i] = val1; - else if (val2.imag() < 0.) - gr.block[i] = val2; + if (val1.imag() < 0.) + gr.block[i] = val1; + else if (val2.imag() < 0.) + gr.block[i] = val2; - // amrex::Print() << "EmU: " << EmU << "\n"; - // amrex::Print() << "Factor: " << Factor << "\n"; - // amrex::Print() << "Sqrt: " << Sqrt << "\n"; - // amrex::Print() << "Denom: " << Denom << "\n"; - // amrex::Print() << "Numerator: " << Factor+Sqrt << "\n"; - // amrex::Print() << "Value: " << (Factor+Sqrt)/Denom << "\n"; + // amrex::Print() << "EmU: " << EmU << "\n"; + // amrex::Print() << "Factor: " << Factor << "\n"; + // amrex::Print() << "Sqrt: " << Sqrt << "\n"; + // amrex::Print() << "Denom: " << Denom << "\n"; + // amrex::Print() << "Numerator: " << Factor+Sqrt << "\n"; + // amrex::Print() << "Value: " << (Factor+Sqrt)/Denom << "\n"; + } + // amrex::Print() << "Using quadratic, gr: " << gr << "\n"; } } diff --git a/Source/Solver/Transport/NEGF/Graphene.H b/Source/Solver/Transport/NEGF/Graphene.H index bf5e62c..7ea0a64 100644 --- a/Source/Solver/Transport/NEGF/Graphene.H +++ b/Source/Solver/Transport/NEGF/Graphene.H @@ -10,9 +10,9 @@ #include "NEGF_Common.H" -class c_Graphene : public c_NEGF_Common +class c_Graphene : public c_NEGF_Common { - using BlkType = ComplexType[NUM_MODES][NUM_MODES]; + using BlkType = ComplexType[BLOCK_SIZE][BLOCK_SIZE]; static amrex::Array type_id; diff --git a/Source/Solver/Transport/NEGF/Matrix_Block.H b/Source/Solver/Transport/NEGF/Matrix_Block.H index e5f8bc5..500d36b 100644 --- a/Source/Solver/Transport/NEGF/Matrix_Block.H +++ b/Source/Solver/Transport/NEGF/Matrix_Block.H @@ -21,6 +21,44 @@ struct MatrixBlock public: T block; + // Non-const .data() for 1D array + template + auto data() -> + typename std::enable_if::value == 1, + typename std::remove_extent::type *>::type + { + return &block[0]; + } + + // Const .data() for 1D array + template + auto const_data() const -> typename std::enable_if< + std::rank::value == 1, + const typename std::remove_extent::type *>::type + { + return &block[0]; + } + + // Non-const .data() for 2D array + template + auto data() -> typename std::enable_if< + std::rank::value == 2, + typename std::remove_all_extents::type *>::type + { + return reinterpret_cast::type *>( + &block[0][0]); + } + + // Const .data() for 2D array + template + auto const_data() const -> typename std::enable_if< + std::rank::value == 2, + const typename std::remove_all_extents::type *>::type + { + return reinterpret_cast< + const typename std::remove_all_extents::type *>(&block[0][0]); + } + AMREX_GPU_HOST_DEVICE MatrixBlock operator=(const amrex::Real c); AMREX_GPU_HOST_DEVICE MatrixBlock operator=(const ComplexType c); @@ -63,7 +101,11 @@ struct MatrixBlock AMREX_GPU_HOST_DEVICE MatrixBlock Conj() const; /*Conjugate*/ AMREX_GPU_HOST_DEVICE MatrixBlock Tran() const; /*Transpose*/ AMREX_GPU_HOST_DEVICE MatrixBlock Dagger() const; /*Conjugate-Transpose*/ - AMREX_GPU_HOST_DEVICE ComplexType DiagSum() const; /*Trace*/ + AMREX_GPU_HOST_DEVICE MatrixBlock Inverse() const; /*Inverse*/ + AMREX_GPU_HOST_DEVICE ComplexType DiagSum() const; /*Trace*/ + AMREX_GPU_HOST_DEVICE ComplexType FrobeniusNorm() const; + AMREX_GPU_HOST_DEVICE void SetDiag( + const ComplexType c); /*set diagonal element*/ template AMREX_GPU_HOST_DEVICE ComplexType @@ -75,6 +117,7 @@ struct MatrixBlock // MatrixBlock AtomicAdd(MatrixBlock& B) const; /*Atomic add for the // block*/ template MatrixBlock // Real() const; + // }; #endif diff --git a/Source/Solver/Transport/NEGF/Matrix_Block_Util.H b/Source/Solver/Transport/NEGF/Matrix_Block_Util.H index 951921d..cd4499e 100644 --- a/Source/Solver/Transport/NEGF/Matrix_Block_Util.H +++ b/Source/Solver/Transport/NEGF/Matrix_Block_Util.H @@ -1,3 +1,4 @@ +#include "MathLib.H" #include "Matrix_Block.H" /* Definitions */ @@ -9,10 +10,10 @@ MatrixBlock MatrixBlock::operator=(const ComplexType c_comp) } template <> -MatrixBlock -MatrixBlock::operator=(const ComplexType c_comp) +MatrixBlock +MatrixBlock::operator=(const ComplexType c_comp) { - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { this->block[i] = c_comp; } @@ -20,13 +21,13 @@ MatrixBlock::operator=(const ComplexType c_comp) } template <> -MatrixBlock -MatrixBlock::operator=( +MatrixBlock +MatrixBlock::operator=( const ComplexType c_comp) { - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { this->block[i][j] = c_comp; } @@ -43,8 +44,8 @@ MatrixBlock MatrixBlock::operator=(const amrex::Real c) } template <> -MatrixBlock -MatrixBlock::operator=(const amrex::Real c) +MatrixBlock +MatrixBlock::operator=(const amrex::Real c) { ComplexType c_complex(c, 0.); *this = c_complex; @@ -52,8 +53,8 @@ MatrixBlock::operator=(const amrex::Real c) } template <> -MatrixBlock -MatrixBlock::operator=(const amrex::Real c) +MatrixBlock +MatrixBlock::operator=(const amrex::Real c) { ComplexType c_complex(c, 0.); *this = c_complex; @@ -69,11 +70,11 @@ MatrixBlock MatrixBlock::operator*(const ComplexType c) } template <> -MatrixBlock -MatrixBlock::operator*(const ComplexType c) +MatrixBlock +MatrixBlock::operator*(const ComplexType c) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] * c; } @@ -82,13 +83,14 @@ MatrixBlock::operator*(const ComplexType c) } template <> -MatrixBlock -MatrixBlock::operator*(const ComplexType c) +MatrixBlock +MatrixBlock::operator*(const ComplexType c) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { result.block[i][j] = this->block[i][j] * c; } @@ -106,12 +108,12 @@ MatrixBlock MatrixBlock::operator*(const MatrixBlock &rhs) } template <> -MatrixBlock -MatrixBlock::operator*( - const MatrixBlock &rhs) +MatrixBlock +MatrixBlock::operator*( + const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] * rhs.block[i]; } @@ -119,18 +121,16 @@ MatrixBlock::operator*( } template <> -MatrixBlock -MatrixBlock::operator*( - const MatrixBlock &rhs) +MatrixBlock +MatrixBlock::operator*( + const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) - { - for (int j = 0; j < NUM_MODES; ++j) - { - result.block[i][j] = this->block[i][j] * rhs.block[i][j]; - } - } + MatrixBlock result; + + // MathLib::MatrixMatrixMultiply(result.data(), this->const_data(), + // rhs.const_data(), + // BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE); + return result; } @@ -143,11 +143,11 @@ MatrixBlock operator*(const ComplexType c_complex, const MatrixBlock &B) } template <> -MatrixBlock operator*( - const ComplexType c_complex, const MatrixBlock &B) +MatrixBlock operator*( + const ComplexType c_complex, const MatrixBlock &B) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = c_complex * B.block[i]; } @@ -155,14 +155,14 @@ MatrixBlock operator*( } template <> -MatrixBlock operator*( +MatrixBlock operator*( const ComplexType c_complex, - const MatrixBlock &B) + const MatrixBlock &B) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { result.block[i][j] = c_complex * B.block[i][j]; } @@ -179,17 +179,17 @@ MatrixBlock operator*(const amrex::Real c_real, const MatrixBlock &B) } template <> -MatrixBlock operator*( - const amrex::Real c_real, const MatrixBlock &B) +MatrixBlock operator*( + const amrex::Real c_real, const MatrixBlock &B) { ComplexType c_complex(c_real, 0.); return operator*(c_complex, B); } template <> -MatrixBlock operator*( +MatrixBlock operator*( const amrex::Real c_real, - const MatrixBlock &B) + const MatrixBlock &B) { ComplexType c_complex(c_real, 0.); return operator*(c_complex, B); @@ -204,11 +204,11 @@ MatrixBlock MatrixBlock::operator+(const ComplexType c) } template <> -MatrixBlock -MatrixBlock::operator+(const ComplexType c) +MatrixBlock +MatrixBlock::operator+(const ComplexType c) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] + c; } @@ -225,12 +225,12 @@ MatrixBlock MatrixBlock::operator+(const MatrixBlock &rhs) } template <> -MatrixBlock -MatrixBlock::operator+( - const MatrixBlock &rhs) +MatrixBlock +MatrixBlock::operator+( + const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] + rhs.block[i]; } @@ -238,14 +238,14 @@ MatrixBlock::operator+( } template <> -MatrixBlock -MatrixBlock::operator+( - const MatrixBlock &rhs) +MatrixBlock +MatrixBlock::operator+( + const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { result.block[i][j] = this->block[i][j] + rhs.block[i][j]; } @@ -262,11 +262,11 @@ MatrixBlock operator+(const ComplexType c, const MatrixBlock &rhs) } template <> -MatrixBlock operator+( - const ComplexType c, const MatrixBlock &rhs) +MatrixBlock operator+( + const ComplexType c, const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = c + rhs.block[i]; } @@ -274,14 +274,14 @@ MatrixBlock operator+( } template <> -MatrixBlock operator+( +MatrixBlock operator+( const ComplexType c, - const MatrixBlock &rhs) + const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { result.block[i][j] = c + rhs.block[i][j]; } @@ -298,11 +298,11 @@ MatrixBlock MatrixBlock::operator-(const amrex::Real c) } template <> -MatrixBlock -MatrixBlock::operator-(const amrex::Real c) +MatrixBlock +MatrixBlock::operator-(const amrex::Real c) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] - c; } @@ -319,11 +319,11 @@ MatrixBlock MatrixBlock::operator-(const ComplexType c) } template <> -MatrixBlock -MatrixBlock::operator-(const ComplexType c) +MatrixBlock +MatrixBlock::operator-(const ComplexType c) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] - c; } @@ -340,12 +340,12 @@ MatrixBlock MatrixBlock::operator-(const MatrixBlock &rhs) } template <> -MatrixBlock -MatrixBlock::operator-( - const MatrixBlock &rhs) +MatrixBlock +MatrixBlock::operator-( + const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] - rhs.block[i]; } @@ -361,11 +361,11 @@ MatrixBlock operator-(const ComplexType c, const MatrixBlock &rhs) } template <> -MatrixBlock operator-( - const ComplexType c, const MatrixBlock &rhs) +MatrixBlock operator-( + const ComplexType c, const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = c - rhs.block[i]; } @@ -381,12 +381,12 @@ MatrixBlock operator-(const MatrixBlock &B, const MatrixBlock &C) } template <> -MatrixBlock operator-( - const MatrixBlock &B, - const MatrixBlock &C) +MatrixBlock operator-( + const MatrixBlock &B, + const MatrixBlock &C) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = B.block[i] - C.block[i]; } @@ -394,14 +394,14 @@ MatrixBlock operator-( } template <> -MatrixBlock operator-( - const MatrixBlock &B, - const MatrixBlock &C) +MatrixBlock operator-( + const MatrixBlock &B, + const MatrixBlock &C) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { result.block[i][j] = B.block[i][j] - C.block[i][j]; } @@ -409,6 +409,34 @@ MatrixBlock operator-( return result; } +/*Inverse*/ +template +MatrixBlock MatrixBlock::Inverse() const +{ + return 1. / (*this); +} + +template <> +MatrixBlock +MatrixBlock::Inverse() const +{ + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) + { + result.block[i] = 1. / this->block[i]; + } + return result; +} + +template <> +MatrixBlock +MatrixBlock::Inverse() const +{ + MatrixBlock result; + // MathLib::DenseMatrixInversion(d_result.p, d_A.p, A_rows, A_cols); + return result; +} + /* Operation [R] = [B] / c_complex */ template MatrixBlock MatrixBlock::operator/(ComplexType c) @@ -418,11 +446,11 @@ MatrixBlock MatrixBlock::operator/(ComplexType c) } template <> -MatrixBlock -MatrixBlock::operator/(ComplexType c) +MatrixBlock +MatrixBlock::operator/(ComplexType c) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] / c; } @@ -439,18 +467,30 @@ MatrixBlock MatrixBlock::operator/(const MatrixBlock &rhs) } template <> -MatrixBlock -MatrixBlock::operator/( - const MatrixBlock &rhs) +MatrixBlock +MatrixBlock::operator/( + const MatrixBlock &rhs) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = this->block[i] / rhs.block[i]; } return result; } +template <> +MatrixBlock +MatrixBlock::operator/( + const MatrixBlock &rhs) +{ + MatrixBlock result = + (*this) * rhs.Inverse(); + // MathLib::DenseMatrixInversion(rhs_inverse.p, rhs.p, A_rows, A_cols); + + return result; +} + /* Operation [R] = c_complex / [C] */ template MatrixBlock operator/(const ComplexType c_complex, const MatrixBlock &C) @@ -460,11 +500,11 @@ MatrixBlock operator/(const ComplexType c_complex, const MatrixBlock &C) } template <> -MatrixBlock operator/( - const ComplexType c_complex, const MatrixBlock &C) +MatrixBlock operator/( + const ComplexType c_complex, const MatrixBlock &C) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { result.block[i] = c_complex / C.block[i]; } @@ -472,14 +512,14 @@ MatrixBlock operator/( } template <> -MatrixBlock operator/( +MatrixBlock operator/( const ComplexType c_complex, - const MatrixBlock &C) + const MatrixBlock &C) { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { result.block[i][j] = c_complex / C.block[i][j]; } @@ -496,12 +536,12 @@ std::ostream &operator<<(std::ostream &stream, const MatrixBlock &rhs) template <> std::ostream &operator<<(std::ostream &stream, - const MatrixBlock &rhs) + const MatrixBlock &rhs) { stream << "{"; - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { - if (i != NUM_MODES - 1) + if (i != BLOCK_SIZE - 1) { stream << rhs.block[i] << ", "; } @@ -516,12 +556,12 @@ std::ostream &operator<<(std::ostream &stream, template <> std::ostream &operator<<(std::ostream &stream, - const MatrixBlock &rhs) + const MatrixBlock &rhs) { stream << "["; - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { - if (i != NUM_MODES - 1) + if (i != BLOCK_SIZE - 1) { stream << rhs.block[i] << ", "; } @@ -537,21 +577,22 @@ std::ostream &operator<<(std::ostream &stream, template <> std::ostream &operator<<( std::ostream &stream, - const MatrixBlock &rhs) + const MatrixBlock &rhs) { stream << "["; - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { stream << std::setw(10) << rhs.block[i][j]; } - if (i != NUM_MODES - 1) stream << "\n "; + if (i != BLOCK_SIZE - 1) stream << "\n "; } stream << "]"; return stream; } +/*Conjugate*/ template MatrixBlock MatrixBlock::Conj() const { @@ -560,11 +601,11 @@ MatrixBlock MatrixBlock::Conj() const } template <> -MatrixBlock MatrixBlock::Conj() - const +MatrixBlock +MatrixBlock::Conj() const { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { amrex::GpuComplex X_conj(this->block[i].real(), -1. * this->block[i].imag()); @@ -574,13 +615,13 @@ MatrixBlock MatrixBlock::Conj() } template <> -MatrixBlock -MatrixBlock::Conj() const +MatrixBlock +MatrixBlock::Conj() const { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { amrex::GpuComplex X_conj(this->block[i][j].real(), -1. * this->block[i][j].imag()); @@ -590,6 +631,7 @@ MatrixBlock::Conj() const return result; } +/*Transpose*/ template MatrixBlock MatrixBlock::Tran() const { @@ -597,20 +639,20 @@ MatrixBlock MatrixBlock::Tran() const } template <> -MatrixBlock MatrixBlock::Tran() - const +MatrixBlock +MatrixBlock::Tran() const { return *this; } template <> -MatrixBlock -MatrixBlock::Tran() const +MatrixBlock +MatrixBlock::Tran() const { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { if (i == j) { @@ -625,6 +667,7 @@ MatrixBlock::Tran() const return result; } +/*Conjugate-transpose*/ template MatrixBlock MatrixBlock::Dagger() const { @@ -632,17 +675,20 @@ MatrixBlock MatrixBlock::Dagger() const } template <> -MatrixBlock -MatrixBlock::Dagger() const +MatrixBlock +MatrixBlock::Dagger() const { return this->Conj().Tran(); } template <> -MatrixBlock -MatrixBlock::Dagger() const +MatrixBlock +MatrixBlock::Dagger() const { - return this->Conj().Tran(); + MatrixBlock result; + // MathLib::ConjugateTranspose(result.p, this->block.p, BLOCK_SIZE, + // BLOCK_SIZE); + return result; } ///* Operation AtomicAdd */ @@ -654,11 +700,11 @@ MatrixBlock::Dagger() const // } // // template <> -// MatrixBlock -// MatrixBlock::AtomicAdd(MatrixBlock& B) const +// MatrixBlock +// MatrixBlock::AtomicAdd(MatrixBlock& B) const //{ // -// for (int i = 0; i < NUM_MODES; ++i) +// for (int i = 0; i < BLOCK_SIZE; ++i) // { // amrex::HostDevice::Atomic::Add(&(this->trace_r.block[i]), // B.block[i].real()); @@ -669,14 +715,14 @@ MatrixBlock::Dagger() const // } // // template <> -// MatrixBlock -// MatrixBlock::AtomicAdd(MatrixBlock& B) -// const +// MatrixBlock +// MatrixBlock::AtomicAdd(MatrixBlock& +// B) const //{ -// MatrixBlock result; -// for (int i = 0; i < NUM_MODES; ++i) +// MatrixBlock result; +// for (int i = 0; i < BLOCK_SIZE; ++i) // { -// for (int j = 0; j < NUM_MODES; ++j) +// for (int j = 0; j < BLOCK_SIZE; ++j) // { // if(i==j) // { @@ -701,12 +747,12 @@ MatrixBlock::Dagger() const // } // // template <> -// MatrixBlock -// MatrixBlock::Real() const +// MatrixBlock +// MatrixBlock::Real() const //{ -// MatrixBlock result; +// MatrixBlock result; // -// for (int i = 0; i < NUM_MODES; ++i) +// for (int i = 0; i < BLOCK_SIZE; ++i) // { // result.block[i] = this->block[i].real(); // } @@ -714,14 +760,14 @@ MatrixBlock::Dagger() const // } // // template <> -// MatrixBlock -// MatrixBlock::Real() const +// MatrixBlock +// MatrixBlock::Real() const //{ -// MatrixBlock result; +// MatrixBlock result; // -// for (int i = 0; i < NUM_MODES; ++i) +// for (int i = 0; i < BLOCK_SIZE; ++i) // { -// for (int j = 0; j < NUM_MODES; ++j) +// for (int j = 0; j < BLOCK_SIZE; ++j) // { // result.block[i][j] = this->block[i][j].real(); // } @@ -738,10 +784,10 @@ ComplexType MatrixBlock::DiagSum() const } template <> -ComplexType MatrixBlock::DiagSum() const +ComplexType MatrixBlock::DiagSum() const { ComplexType result(0., 0.); - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { result += this->block[i]; } @@ -749,12 +795,12 @@ ComplexType MatrixBlock::DiagSum() const } template <> -ComplexType MatrixBlock::DiagSum() const +ComplexType MatrixBlock::DiagSum() const { ComplexType result(0., 0.); - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { if (i == j) { @@ -765,6 +811,40 @@ ComplexType MatrixBlock::DiagSum() const return result; } +/* Operation Norm */ +template +ComplexType MatrixBlock::FrobeniusNorm() const +{ + ComplexType result(0., 0.); + return *this; +} + +template <> +ComplexType MatrixBlock::FrobeniusNorm() const +{ + ComplexType result(0., 0.); + for (int i = 0; i < BLOCK_SIZE; ++i) + { + result += this->block[i] * this->block[i]; + } + return amrex::sqrt(result); +} + +template <> +ComplexType MatrixBlock::FrobeniusNorm() + const +{ + ComplexType result(0., 0.); + for (int i = 0; i < BLOCK_SIZE; ++i) + { + for (int j = 0; j < BLOCK_SIZE; ++j) + { + result += this->block[i][j] * this->block[i][j]; + } + } + return amrex::sqrt(result); +} + ///* Operation DiagDotSum */ template template @@ -776,10 +856,10 @@ ComplexType MatrixBlock::DiagDotSum(U &vec) const template <> template -ComplexType MatrixBlock::DiagDotSum(U &vec) const +ComplexType MatrixBlock::DiagDotSum(U &vec) const { ComplexType result(0., 0.); - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { ComplexType factor(vec[i], 0.); result += factor * this->block[i]; @@ -789,13 +869,13 @@ ComplexType MatrixBlock::DiagDotSum(U &vec) const template <> template -ComplexType MatrixBlock::DiagDotSum( +ComplexType MatrixBlock::DiagDotSum( U &vec) const { ComplexType result(0., 0.); - for (int i = 0; i < NUM_MODES; ++i) + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { if (i == j) { @@ -817,11 +897,11 @@ MatrixBlock MatrixBlock::DiagMult(U &vec) const template <> template -MatrixBlock -MatrixBlock::DiagMult(U &vec) const +MatrixBlock +MatrixBlock::DiagMult(U &vec) const { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { ComplexType factor(vec[i], 0.); result.block[i] = factor * this->block[i]; @@ -831,13 +911,13 @@ MatrixBlock::DiagMult(U &vec) const template <> template -MatrixBlock -MatrixBlock::DiagMult(U &vec) const +MatrixBlock +MatrixBlock::DiagMult(U &vec) const { - MatrixBlock result; - for (int i = 0; i < NUM_MODES; ++i) + MatrixBlock result; + for (int i = 0; i < BLOCK_SIZE; ++i) { - for (int j = 0; j < NUM_MODES; ++j) + for (int j = 0; j < BLOCK_SIZE; ++j) { if (i == j) { @@ -848,3 +928,39 @@ MatrixBlock::DiagMult(U &vec) const } return result; } + +///* Operation DiagSet */ +template +void MatrixBlock::SetDiag(const ComplexType c_comp) +{ +} + +template <> +void MatrixBlock::SetDiag(const ComplexType c_comp) +{ + for (int i = 0; i < BLOCK_SIZE; ++i) + { + this->block[i] = c_comp; + } +} + +template <> +void MatrixBlock::SetDiag( + const ComplexType c_comp) +{ + ComplexType zero(0., 0.); + for (int i = 0; i < BLOCK_SIZE; ++i) + { + for (int j = 0; j < BLOCK_SIZE; ++j) + { + if (i == j) + { + this->block[i][i] = c_comp; + } + else + { + this->block[i][i] = zero; + } + } + } +} diff --git a/Source/Solver/Transport/NEGF/NEGF_Common.H b/Source/Solver/Transport/NEGF/NEGF_Common.H index fa009f7..059c2a4 100644 --- a/Source/Solver/Transport/NEGF/NEGF_Common.H +++ b/Source/Solver/Transport/NEGF/NEGF_Common.H @@ -158,6 +158,18 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath amrex::Gpu::DeviceVector d_Trace_i; #endif + /*Condensed Hamiltonian*/ + struct CondensedHamiltonian + { + MatrixBlock Xi_s; + MatrixBlock Xi; + MatrixBlock Pi; + + CondensedHamiltonian() + : Xi_s(), Xi(), Pi() {} // default initialized + // calling constructor of MatrixBlock + }; + void Set_KeyParams(const std::string &NS_name_str, const int &NS_id_counter, const amrex::Real &NS_initial_deposit_value); void Define_FoldersAndFiles(const std::string &negf_foldername_str); @@ -181,6 +193,7 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath void Read_IntegrandWritingParams(amrex::ParmParse &); void Read_AtomLocationAndChargeDistributionFilename(amrex::ParmParse &); void Read_RecursiveOptimizationParams(amrex::ParmParse &); + void Read_DecimationTechniqueParams(amrex::ParmParse &); void Assert_Reads(); void Assert_KeyParameters(); @@ -312,7 +325,14 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath virtual void Define_IntegrationPaths(); virtual AMREX_GPU_HOST_DEVICE void Compute_SurfaceGreensFunction( - MatrixBlock &gr, const ComplexType EmU) = 0; + MatrixBlock &gr, const ComplexType E, const ComplexType U); + + /*Decimation*/ + virtual AMREX_GPU_HOST_DEVICE void Compute_CondensedHamiltonian( + CondensedHamiltonian &CondH, const ComplexType EmU); + + AMREX_GPU_HOST_DEVICE void DecimationTechnique(MatrixBlock &gr, + const ComplexType EmU); virtual void Write_Eql_Characteristics( const amrex::Vector E_vec, const RealTable1D &DOS_data, @@ -447,6 +467,17 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath RealTable1D d_NonEq_Integrand_Drain_data; #endif + /*Decimation Technique*/ + int use_decimation = + false; // whether to use decimation technique in derived class. + // if Compute_SurfaceGreensFunction method is not overriden + // then the default is decimation regardless of what we set + // use_decimation as. + + int decimation_max_iter = 30; // maximum iterations allowed + int decimation_rel_error = 1.e-5; // relative error + int decimation_layers = 10; // P + public: /*MPI Params*/ amrex::Vector MPI_recv_count; diff --git a/Source/Solver/Transport/NEGF/NEGF_Common.cpp b/Source/Solver/Transport/NEGF/NEGF_Common.cpp index c8f3225..2dcfa9f 100644 --- a/Source/Solver/Transport/NEGF/NEGF_Common.cpp +++ b/Source/Solver/Transport/NEGF/NEGF_Common.cpp @@ -7,8 +7,9 @@ #include "Matrix_Block_Util.H" /*Explicit specializations*/ -template class c_NEGF_Common; // of c_CNT -template class c_NEGF_Common; // c_Graphene +template class c_NEGF_Common; // of c_CNT +template class c_NEGF_Common< + ComplexType[BLOCK_SIZE][BLOCK_SIZE]>; // c_Graphene const std::map map_strToAngleType = { {"d", AngleType::Degrees}, @@ -594,6 +595,15 @@ void c_NEGF_Common::Read_RecursiveOptimizationParams(amrex::ParmParse &pp_ns) queryWithParser(pp_ns, "num_recursive_parts", num_recursive_parts); } +template +void c_NEGF_Common::Read_DecimationTechniqueParams(amrex::ParmParse &pp_ns) +{ + queryWithParser(pp_ns, "use_decimation", use_decimation); + queryWithParser(pp_ns, "decimation_max_iter", decimation_max_iter); + queryWithParser(pp_ns, "decimation_rel_error", decimation_rel_error); + queryWithParser(pp_ns, "decimation_layers", decimation_layers); +} + template void c_NEGF_Common::Assert_Reads() { @@ -643,6 +653,7 @@ void c_NEGF_Common::Read_CommonData(amrex::ParmParse &pp) Read_WritingRelatedFlags(pp); Read_AtomLocationAndChargeDistributionFilename(pp); Read_RecursiveOptimizationParams(pp); + Read_DecimationTechniqueParams(pp); } template @@ -1307,6 +1318,81 @@ void c_NEGF_Common::Allocate_ArraysForHamiltonian() SetVal_Table1D(h_Hc_loc_data, zero); } +template +void c_NEGF_Common::Compute_CondensedHamiltonian(CondensedHamiltonian &CondH, + const ComplexType EmU) +{ + auto &Xi_s = CondH.Xi_s; + auto &Xi = CondH.Xi; + auto &Pi = CondH.Pi; + + int P = decimation_layers; + + BlkTable1D H_tilde_data({0}, {P - 1}); + auto const &H_tilde = H_tilde_data.table(); + auto const &Hb = h_Hb_loc_data.table(); + + MatrixBlock EmUI; + EmUI.SetDiag(EmU); // This is (E-U)I + + H_tilde(P - 1).SetDiag(1. / EmU); // This is (P-1) element of [(E-U)I]^-1 + + auto H_tilde_kP = H_tilde(P - 1); // This is H_tilde(P-1)(P-1) + + for (int k = P - 2; k >= 1; --k) + { + int j = k % offDiag_repeatBlkSize; + auto temp1 = EmUI - 1. * Hb(j) * H_tilde(k + 1) * Hb(j).Dagger(); + + H_tilde(k) = temp1.Inverse(); + + H_tilde_kP = H_tilde(k) * Hb(j) * H_tilde_kP; + } + // here H_tilde_kP is H_tilde_1P + + MatrixBlock C_tilde_kk = H_tilde(1); //==H_tilde_11 = C_tilde_11 + + for (int k = 2; k < P; ++k) + { + int j = (k - 1) % offDiag_repeatBlkSize; + C_tilde_kk = H_tilde(k) + H_tilde(k) * Hb(j).Dagger() * C_tilde_kk * + Hb(j) * H_tilde(k); + } + /* Here, C_tilde_kk = C_tilde_(P-1)(P-1) + * and, C_tilde_1P = H_tilde_1P = H_tilde_kP + * C_tilde_11 = H_tilde(1) + */ + + int id = (P - 1) % offDiag_repeatBlkSize; + Xi_s = Hb(0) * H_tilde(1) * Hb(0).Dagger(); + Xi = Xi_s + Hb(id) * C_tilde_kk * Hb(id).Dagger(); + Pi = Hb(0) * H_tilde_kP * Hb(id); + + /* For P=2, as an example, + * + * Xi_s = Hb(0) * H_tilde(1) * Hb(0).Dagger(); + * Xi = Xi_s + Hb(1) * C_tilde_kk * Hb(1).Dagger(); + * Pi = Hb(0) * H_tilde_kP * Hb(1); + * + * H_tilde(1) = 1/(E-U) + * Hb(0) = beta + * Hb(1) = gamma + * C_tilde_kk = H_tilde(1) + * H_tilde_kP = H_tilde(1) + * + */ + + /* + amrex::Print() << " EmU: " << EmU << "\n"; + amrex::Print() << "Xi_s: " << Xi_s << "\n"; + amrex::Print() << " Xi: " << Xi << "\n"; + amrex::Print() << " Pi: " << Pi << "\n"; + amrex::Print() << " H_tilde(1): " << H_tilde(1) << "\n"; + amrex::Print() << " C_tilde_kk: " << C_tilde_kk << "\n"; + amrex::Print() << " H_tilde_kP: " << H_tilde_kP << "\n"; + */ +} + template void c_NEGF_Common::Allocate_ArraysForLeadSpecificQuantities() { @@ -3881,11 +3967,74 @@ void c_NEGF_Common::Compute_Rho0() Deallocate_TemporaryArraysForGFComputation(); } -// template -// AMREX_GPU_HOST_DEVICE -// void -// c_NEGF_Common:: Compute_SurfaceGreensFunction (MatrixBlock& gr, const -// ComplexType EmU) {} +template +AMREX_GPU_HOST_DEVICE void c_NEGF_Common::DecimationTechnique( + MatrixBlock &gr, const ComplexType EmU) +{ + CondensedHamiltonian CondH; + Compute_CondensedHamiltonian(CondH, EmU); + + MatrixBlock mu, nu, gamma, zeta; + + // initialize these blocks + { + MatrixBlock EmUI; + EmUI.SetDiag(EmU); + + auto mu0 = EmUI - CondH.Xi_s; + auto nu0 = EmUI - CondH.Xi; + auto gamma0 = CondH.Pi; + + auto nu0_inverse = nu0.Inverse(); + auto gamma0_dagger = gamma0.Dagger(); + + MatrixBlock temp1 = gamma0 * nu0_inverse; + MatrixBlock temp2 = temp1 * gamma0_dagger; + MatrixBlock temp3 = gamma0_dagger * nu0_inverse; + + mu = mu0 - temp2; + nu = nu0 - temp2 - temp3 * gamma0; + gamma = temp1 * gamma0; + zeta = temp3 * gamma0_dagger; + } + + amrex::Real error = 1; + int i = 0; + + while (error > decimation_rel_error) + { + auto mu_old = mu; + + auto nu_inverse = nu.Inverse(); + auto temp1 = zeta * nu_inverse * gamma; + auto temp2 = gamma * nu_inverse * zeta; + + mu = mu - temp1; + nu = nu - temp1 - temp2; + gamma = gamma * nu_inverse * gamma; + zeta = zeta * nu_inverse * zeta; + + auto sum = mu + mu_old; + auto error_mat = (mu - mu_old) * sum.Inverse(); + + error = sqrt(amrex::norm(error_mat.FrobeniusNorm())); + if (i > decimation_max_iter) break; + + i++; + } + // amrex::Print() << "decimation iterations: " << i << " error: " << error + // << "\n"; + + gr = mu.Inverse(); +} + +template +AMREX_GPU_HOST_DEVICE void c_NEGF_Common::Compute_SurfaceGreensFunction( + MatrixBlock &gr, const ComplexType E, ComplexType U) +{ + DecimationTechnique(gr, E - U); + // amrex::Print() << "Using decimation, gr: " << gr << "\n"; +} template void c_NEGF_Common::get_Sigma_at_contacts(BlkTable1D &h_Sigma_contact_data, @@ -3897,8 +4046,7 @@ void c_NEGF_Common::get_Sigma_at_contacts(BlkTable1D &h_Sigma_contact_data, for (std::size_t c = 0; c < NUM_CONTACTS; ++c) { MatrixBlock gr; - Compute_SurfaceGreensFunction(gr, E - U_contact[c]); - // amrex::Print() << "c, E, gr: " << c << " " << E << " " << gr << "\n"; + Compute_SurfaceGreensFunction(gr, E, U_contact[c]); h_Sigma(c) = h_tau(c) * gr * h_tau(c).Dagger(); } } diff --git a/Source/Solver/Transport/Transport.cpp b/Source/Solver/Transport/Transport.cpp index 3787f98..3a5c384 100644 --- a/Source/Solver/Transport/Transport.cpp +++ b/Source/Solver/Transport/Transport.cpp @@ -79,7 +79,12 @@ void c_TransportSolver::ReadData() Read_NSTypes(pp_transport); - Read_GatherAndDepositFields(pp_transport); + // ComplexType EmU = E - U; + auto &rCode = c_Code::GetInstance(); + if (rCode.use_electrostatic) + { + Read_GatherAndDepositFields(pp_transport); + } Read_SelfConsistencyInput(pp_transport); diff --git a/Source/Utils/CodeUtils/MathLib.H b/Source/Utils/CodeUtils/MathLib.H new file mode 100644 index 0000000..087093c --- /dev/null +++ b/Source/Utils/CodeUtils/MathLib.H @@ -0,0 +1,18 @@ +#include "../../Code_Definitions.H" + +namespace MathLib +{ +void MatrixMatrixMultiply(ComplexType* d_C, const ComplexType* d_A, + const ComplexType* d_B, unsigned int A_rows, + unsigned int A_cols, unsigned int B_cols); + +void DenseMatrixInversion(ComplexType* d_Anv, ComplexType* d_A, + unsigned int A_rows, unsigned int A_cols); + +void ConjugateTranspose(ComplexType* d_AconjT, const ComplexType* d_A, + unsigned int A_rows, unsigned int A_cols); + +void DenseMatrixEigendecomposition(ComplexType* d_U, ComplexType* d_Lambda, + unsigned int A_rows, unsigned int A_cols); + +} // namespace MathLib diff --git a/Source/Utils/CodeUtils/MathLib.cpp b/Source/Utils/CodeUtils/MathLib.cpp new file mode 100644 index 0000000..0692400 --- /dev/null +++ b/Source/Utils/CodeUtils/MathLib.cpp @@ -0,0 +1,195 @@ +#include + +#ifdef AMREX_USE_CUDA +#include //CUBLAS +#include //CUDA runtime +#include //CUSOLVER +#endif + +#include "MathLib.H" +#include "cudaErrorCheck.H" + +void MathLib::MatrixMatrixMultiply(ComplexType* d_C, const ComplexType* d_A, + const ComplexType* d_B, unsigned int A_rows, + unsigned int A_cols, unsigned int B_cols) +{ +#ifdef AMREX_USE_GPU +#ifdef AMREX_USE_CUDA + cublasHandle_t handle; + const cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0); + const cuDoubleComplex beta = make_cuDoubleComplex(0.0, 0.0); + checkCudaErrors(cublasCreate(&handle)); + + // Perform matrix multiplication: C = alpha * A * B + beta * C + // see: https://docs.nvidia.com/cuda/archive/10.0/cublas/index.html + cublasStatus_t status = + cublasZgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, A_rows, B_cols, A_cols, + &alpha, reinterpret_cast(d_A), + A_rows, reinterpret_cast(d_B), + A_cols, &beta, reinterpret_cast(d_C), + A_rows); + checkCudaErrors(status); + + checkCudaErrors(cublasDestroy(handle)); +#elif AMREX_USE_HIP + /*HIP version*/ +#elif AMREX_USE_SYCL + /*SYCL version*/ +#endif +#else + /*LAPACK version*/ +#endif +} + +void MathLib::DenseMatrixInversion(ComplexType* d_Ainv, ComplexType* d_A, + unsigned int A_rows, unsigned int A_cols) +{ +#ifdef AMREX_USE_GPU +#ifdef AMREX_USE_CUDA + cublasHandle_t cublasH = nullptr; + cusolverDnHandle_t cusolverH = nullptr; + + checkCudaErrors(cusolverDnCreate(&cusolverH)); + checkCudaErrors(cublasCreate(&cublasH)); + + int* d_info = nullptr; + checkCudaErrors(cudaMalloc(&d_info, sizeof(int))); + + int* d_pivot = nullptr; + checkCudaErrors(cudaMalloc(&d_pivot, A_rows * sizeof(int))); + + int lwork = 0; + cuDoubleComplex* d_work = nullptr; + + // Compute A_inverse from A using LU decomposition. + checkCudaErrors( + cusolverDnZgetrf_bufferSize(cusolverH, A_rows, A_cols, + reinterpret_cast(d_A), + A_rows, &lwork)); + + checkCudaErrors(cudaMalloc(&d_work, lwork * sizeof(cuDoubleComplex))); + + // Perform LU decomposition using cuSolver. + // P A = L U (P is permutation matrix) + // see: https://docs.nvidia.com/cuda/cusolver/index.html + checkCudaErrors(cusolverDnZgetrf(cusolverH, A_rows, A_cols, + reinterpret_cast(d_A), + A_rows, d_work, d_pivot, d_info)); + + // Check if LU decomposition was successful + int info = 0; + checkCudaErrors( + cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost)); + if (info != 0) + { + std::cerr << "LU decomposition failed with info = " << info << "\n"; + cudaDeviceReset(); + exit(1); + } + + // Solve for Ainv using eq: A * A_inv = I, where A is factorized. + // op(A) * X = B + checkCudaErrors( + cusolverDnZgetrs(cusolverH, CUBLAS_OP_N, A_rows, A_rows, + reinterpret_cast(d_A), A_rows, + d_pivot, reinterpret_cast(d_Ainv), + A_rows, d_info)); + + checkCudaErrors(cudaFree(d_info)); + checkCudaErrors(cudaFree(d_pivot)); + checkCudaErrors(cudaFree(d_work)); + + checkCudaErrors(cusolverDnDestroy(cusolverH)); + checkCudaErrors(cublasDestroy(cublasH)); + +#elif AMREX_USE_HIP + /*HIP version*/ +#elif AMREX_USE_SYCL + /*SYCL version*/ +#endif +#else + /*LAPACK version*/ +#endif +} + +void MathLib::ConjugateTranspose(ComplexType* d_AconjT, const ComplexType* d_A, + unsigned int A_rows, unsigned int A_cols) +{ +#ifdef AMREX_USE_GPU +#ifdef AMREX_USE_CUDA + cublasHandle_t handle; + const cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0); + const cuDoubleComplex beta = make_cuDoubleComplex(0.0, 0.0); + checkCudaErrors(cublasCreate(&handle)); + + // Perform matrix conjugate transpose: d_AconjT = conj(d_A) + // https://docs.nvidia.com/cuda/cublas/ + cublasStatus_t status = + cublasZgeam(handle, CUBLAS_OP_C, CUBLAS_OP_N, A_cols, A_rows, &alpha, + reinterpret_cast(d_A), A_rows, + &beta, reinterpret_cast(d_A), + A_rows, reinterpret_cast(d_AconjT), + A_cols); + checkCudaErrors(status); + + checkCudaErrors(cublasDestroy(handle)); +#elif AMREX_USE_HIP + /*HIP version*/ +#elif AMREX_USE_SYCL + /*SYCL version*/ +#endif +#else + /*LAPACK version*/ +#endif +} + +void MathLib::DenseMatrixEigendecomposition(ComplexType* d_U, + ComplexType* d_Lambda, + unsigned int A_rows, + unsigned int A_cols) +{ +#ifdef AMREX_USE_GPU +#ifdef AMREX_USE_CUDA + cusolverDnHandle_t cusolverH; + checkCudaErrors(cusolverDnCreate(&cusolverH)); + + // Copy the matrix to be decomposed, say A, into d_U before calling this. + // so d_U will hold the eigenvectors upon completion + + // Workspace and info setup for cuSolver + int work_size = 0; + int* devInfo; + checkCudaErrors(cudaMalloc((void**)&devInfo, sizeof(int))); + + //// Query workspace size for eigendecomposition of a complex Hermitian + /// matrix + // checkCudaErrors(cusolverDnZheevd_bufferSize( + // cusolverH, CUBLAS_FILL_MODE_UPPER, A_rows, + // reinterpret_cast(d_U), A_rows, + // reinterpret_cast(d_Lambda), &work_size)); + + //// Allocate workspace + // ComplexType* d_work; + // checkCudaErrors(cudaMalloc((void**)&d_work, work_size * + // sizeof(ComplexType))); + + //// Perform the eigendecomposition + // checkCudaErrors(cusolverDnZheevd( + // cusolverH, CUBLAS_FILL_MODE_UPPER, A_rows, + // reinterpret_cast(d_U), A_rows, + // reinterpret_cast(d_Lambda), + // reinterpret_cast(d_work), work_size, devInfo)); + + // Clean up + // checkCudaErrors(cudaFree(d_work)); + // checkCudaErrors(cudaFree(devInfo)); + checkCudaErrors(cusolverDnDestroy(cusolverH)); +#elif AMREX_USE_HIP + /*HIP version*/ +#elif AMREX_USE_SYCL + /*SYCL version*/ +#endif +#else + /*LAPACK version*/ +#endif +} diff --git a/Source/Utils/CodeUtils/cudaErrorCheck.H b/Source/Utils/CodeUtils/cudaErrorCheck.H new file mode 100644 index 0000000..872bac3 --- /dev/null +++ b/Source/Utils/CodeUtils/cudaErrorCheck.H @@ -0,0 +1,18 @@ +//////////////////////////////////////////////////////////////////////////////// +// CUDA error checking +//////////////////////////////////////////////////////////////////////////////// +#define checkCudaErrors(val) \ + __checkCudaErrors__((val), #val, __FILE__, __LINE__) + +template +inline void __checkCudaErrors__(T code, const char *func, const char *file, + int line) +{ + if (code) + { + fprintf(stderr, "CUDA error at %s:%d code=%d \"%s\" \n", file, line, + (unsigned int)code, func); + cudaDeviceReset(); + exit(EXIT_FAILURE); + } +} diff --git a/scripts/negf_cpp/decimation_technique/Source/main.cpp b/scripts/negf_cpp/decimation_technique/Source/main.cpp index 616e835..7c4ab34 100644 --- a/scripts/negf_cpp/decimation_technique/Source/main.cpp +++ b/scripts/negf_cpp/decimation_technique/Source/main.cpp @@ -102,10 +102,12 @@ void CondenseHamiltonian(CondensedHamiltonianElem& cond, MatrixDType E, cond.Pi = gamma * beta * H_tilde_11; - // amrex::Print() << "E: " << E << "\n"; - // amrex::Print() << "Xi_s: " << Xi_s << "\n"; - // amrex::Print() << "Xi: " << Xi << "\n"; - // amrex::Print() << "Pi: " << Pi << "\n"; + amrex::Print() << "E: " << E << "\n"; + amrex::Print() << "U: " << U << "\n"; + amrex::Print() << "EmU: " << E-U << "\n"; + amrex::Print() << "Xi_s: " << cond.Xi_s << "\n"; + amrex::Print() << "Xi: " << cond.Xi << "\n"; + amrex::Print() << "Pi: " << cond.Pi << "\n"; } AMREX_GPU_HOST_DEVICE From e376692d36be20addfd4bcf953e0e8268c2dca3a Mon Sep 17 00:00:00 2001 From: "saru@southpole" Date: Tue, 19 Nov 2024 21:07:48 -0800 Subject: [PATCH 2/4] minor formatting modification --- Source/Solver/Transport/NEGF/NEGF_Common.H | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Source/Solver/Transport/NEGF/NEGF_Common.H b/Source/Solver/Transport/NEGF/NEGF_Common.H index 059c2a4..48a0b7c 100644 --- a/Source/Solver/Transport/NEGF/NEGF_Common.H +++ b/Source/Solver/Transport/NEGF/NEGF_Common.H @@ -468,11 +468,10 @@ class c_NEGF_Common : protected MatrixBlock, private c_IntegrationPath #endif /*Decimation Technique*/ - int use_decimation = - false; // whether to use decimation technique in derived class. - // if Compute_SurfaceGreensFunction method is not overriden - // then the default is decimation regardless of what we set - // use_decimation as. + int use_decimation = false; + // whether to use decimation technique in the derived class. + // derived class default is false. + // base class default is true. int decimation_max_iter = 30; // maximum iterations allowed int decimation_rel_error = 1.e-5; // relative error From 2b2cdc90c4f4ba94e3bceec3ce93c795ad4e4a36 Mon Sep 17 00:00:00 2001 From: "saru@southpole" Date: Fri, 22 Nov 2024 17:12:45 -0800 Subject: [PATCH 3/4] tested decimation --- Source/Solver/Transport/NEGF/NEGF_Common.cpp | 7 +++++++ input/negf/StewartFrancois2006/all_around_metal | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/Source/Solver/Transport/NEGF/NEGF_Common.cpp b/Source/Solver/Transport/NEGF/NEGF_Common.cpp index 2dcfa9f..6c6e6e9 100644 --- a/Source/Solver/Transport/NEGF/NEGF_Common.cpp +++ b/Source/Solver/Transport/NEGF/NEGF_Common.cpp @@ -602,6 +602,13 @@ void c_NEGF_Common::Read_DecimationTechniqueParams(amrex::ParmParse &pp_ns) queryWithParser(pp_ns, "decimation_max_iter", decimation_max_iter); queryWithParser(pp_ns, "decimation_rel_error", decimation_rel_error); queryWithParser(pp_ns, "decimation_layers", decimation_layers); + + amrex::Print() << "##### use_decimation: " << use_decimation << "\n"; + amrex::Print() << "##### decimation_max_iter: " << decimation_max_iter + << "\n"; + amrex::Print() << "##### decimation_rel_error: " << decimation_rel_error + << "\n"; + amrex::Print() << "##### decimation_layers: " << decimation_layers << "\n"; } template diff --git a/input/negf/StewartFrancois2006/all_around_metal b/input/negf/StewartFrancois2006/all_around_metal index 605c8bb..bb4ebd9 100644 --- a/input/negf/StewartFrancois2006/all_around_metal +++ b/input/negf/StewartFrancois2006/all_around_metal @@ -210,4 +210,10 @@ cnt.eq_integration_pts = 30 30 30 cnt.noneq_integration_pts = 100 cnt.write_at_iter = 1 + +### decimation technique +cnt.use_decimation=1 +cnt.decimation_max_iter=50 +cnt.decimation_rel_error=1e-5 +cnt.decimation_layers=20 ##################################### From 65a925ee1125eb395406a9aa38201f8b1c2b3663 Mon Sep 17 00:00:00 2001 From: "saru@southpole" Date: Sun, 1 Dec 2024 16:56:07 -0800 Subject: [PATCH 4/4] verified decimation technique implementation --- input/negf/StewartFrancois2006/all_around_metal | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/input/negf/StewartFrancois2006/all_around_metal b/input/negf/StewartFrancois2006/all_around_metal index bb4ebd9..2bc5985 100644 --- a/input/negf/StewartFrancois2006/all_around_metal +++ b/input/negf/StewartFrancois2006/all_around_metal @@ -212,8 +212,8 @@ cnt.noneq_integration_pts = 100 cnt.write_at_iter = 1 ### decimation technique -cnt.use_decimation=1 -cnt.decimation_max_iter=50 -cnt.decimation_rel_error=1e-5 -cnt.decimation_layers=20 +#cnt.use_decimation=1 +#cnt.decimation_max_iter=50 +#cnt.decimation_rel_error=1e-5 +#cnt.decimation_layers=100 #####################################