Skip to content

Commit

Permalink
no merge in hot loop
Browse files Browse the repository at this point in the history
results in measureable run time decrease for Intel and GNU compilers
  • Loading branch information
MarDiehl committed Oct 5, 2023
1 parent 05e675b commit 0e353d9
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 9 deletions.
7 changes: 4 additions & 3 deletions src/phase_mechanical_plastic_dislotungsten.f90
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ pure module subroutine dislotungsten_LpAndItsTangent(Lp,dLp_dMp, &
T !< temperature
real(pREAL), dimension(param(ph)%sum_N_sl) :: &
dot_gamma, ddot_gamma_dtau
real(pREAL), dimension(3,3,param(ph)%sum_N_sl) :: &
P_nS


T = thermal_T(ph,en)
Expand All @@ -304,13 +306,12 @@ pure module subroutine dislotungsten_LpAndItsTangent(Lp,dLp_dMp, &
associate(prm => param(ph))

call kinetics(Mp,T,ph,en, dot_gamma,ddot_gamma_dtau)
P_nS = merge(prm%P_nS_pos,prm%P_nS_neg, spread(spread(dot_gamma,1,3),2,3)>0.0_pREAL) ! faster than 'merge' in loop
do i = 1, prm%sum_N_sl
Lp = Lp + dot_gamma(i)*prm%P_sl(1:3,1:3,i)
forall (k=1:3,l=1:3,m=1:3,n=1:3) &
dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) &
+ ddot_gamma_dtau(i) * prm%P_sl(k,l,i) &
* merge(prm%P_nS_pos(m,n,i), &
prm%P_nS_neg(m,n,i), dot_gamma(i)>0.0_pREAL)
+ ddot_gamma_dtau(i) * prm%P_sl(k,l,i) * P_nS(m,n,i)
end do

end associate
Expand Down
7 changes: 4 additions & 3 deletions src/phase_mechanical_plastic_kinehardening.f90
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ pure module subroutine kinehardening_LpAndItsTangent(Lp,dLp_dMp, Mp,ph,en)
i,k,l,m,n
real(pREAL), dimension(param(ph)%sum_N_sl) :: &
dot_gamma, ddot_gamma_dtau
real(pREAL), dimension(3,3,param(ph)%sum_N_sl) :: &
P_nS


Lp = 0.0_pREAL
Expand All @@ -280,13 +282,12 @@ pure module subroutine kinehardening_LpAndItsTangent(Lp,dLp_dMp, Mp,ph,en)
associate(prm => param(ph))

call kinetics(Mp,ph,en, dot_gamma,ddot_gamma_dtau)
P_nS = merge(prm%P_nS_pos,prm%P_nS_neg, spread(spread(dot_gamma,1,3),2,3)>0.0_pREAL) ! faster than 'merge' in loop
do i = 1, prm%sum_N_sl
Lp = Lp + dot_gamma(i)*prm%P(1:3,1:3,i)
forall (k=1:3,l=1:3,m=1:3,n=1:3) &
dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) &
+ ddot_gamma_dtau(i) * prm%P(k,l,i) &
* merge(prm%P_nS_pos(m,n,i), &
prm%P_nS_neg(m,n,i), dot_gamma(i)>0.0_pREAL)
+ ddot_gamma_dtau(i) * prm%P(k,l,i) * P_nS(m,n,i)
end do

end associate
Expand Down
7 changes: 4 additions & 3 deletions src/phase_mechanical_plastic_phenopowerlaw.f90
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ pure module subroutine phenopowerlaw_LpAndItsTangent(Lp,dLp_dMp,Mp,ph,en)
i,k,l,m,n
real(pREAL), dimension(param(ph)%sum_N_sl) :: &
dot_gamma_sl,ddot_gamma_dtau_sl
real(pREAL), dimension(3,3,param(ph)%sum_N_sl) :: &
P_nS
real(pREAL), dimension(param(ph)%sum_N_tw) :: &
dot_gamma_tw,ddot_gamma_dtau_tw

Expand All @@ -322,13 +324,12 @@ pure module subroutine phenopowerlaw_LpAndItsTangent(Lp,dLp_dMp,Mp,ph,en)
associate(prm => param(ph))

call kinetics_sl(Mp,ph,en,dot_gamma_sl,ddot_gamma_dtau_sl)
P_nS = merge(prm%P_nS_pos,prm%P_nS_neg, spread(spread(dot_gamma_sl,1,3),2,3)>0.0_pREAL) ! faster than 'merge' in loop
slipSystems: do i = 1, prm%sum_N_sl
Lp = Lp + dot_gamma_sl(i)*prm%P_sl(1:3,1:3,i)
forall (k=1:3,l=1:3,m=1:3,n=1:3) &
dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) &
+ ddot_gamma_dtau_sl(i) * prm%P_sl(k,l,i) &
* merge(prm%P_nS_pos(m,n,i), &
prm%P_nS_neg(m,n,i), dot_gamma_sl(i)>0.0_pREAL)
+ ddot_gamma_dtau_sl(i) * prm%P_sl(k,l,i) * P_nS(m,n,i)
end do slipSystems

call kinetics_tw(Mp,ph,en,dot_gamma_tw,ddot_gamma_dtau_tw)
Expand Down

0 comments on commit 0e353d9

Please sign in to comment.