Skip to content

Commit

Permalink
Merge pull request #178 from jtkrogel/offload_spline_v
Browse files Browse the repository at this point in the history
Add spline evaluate_v offload
  • Loading branch information
ye-luo authored Sep 13, 2018
2 parents 15e8597 + 2370e5f commit bd7e78a
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 2 deletions.
50 changes: 50 additions & 0 deletions src/Numerics/Spline2/MultiBsplineOffload.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ template <typename T> struct MultiBsplineOffload
static void evaluate_v(const spliner_type *restrict spline_m, T x, T y, T z, T *restrict vals,
size_t num_splines);

static void evaluate_v_v2(const spliner_type *restrict spline_m, T x, T y, T z, T *restrict vals,
size_t num_splines);

static void evaluate_vgl(const spliner_type *restrict spline_m, T x, T y, T z, T *restrict vals, T *restrict grads,
T *restrict lapl, size_t num_splines);

Expand Down Expand Up @@ -100,6 +103,53 @@ inline void MultiBsplineOffload<T>::evaluate_v(const spliner_type *restrict spli
}
}

template <typename T>
inline void MultiBsplineOffload<T>::evaluate_v_v2(const spliner_type *restrict spline_m,
T x, T y, T z, T *restrict vals,
size_t num_splines)
{
x -= spline_m->x_grid.start;
y -= spline_m->y_grid.start;
z -= spline_m->z_grid.start;
T tx, ty, tz;
int ix, iy, iz;
SplineBound<T>::get(x * spline_m->x_grid.delta_inv, tx, ix,
spline_m->x_grid.num - 1);
SplineBound<T>::get(y * spline_m->y_grid.delta_inv, ty, iy,
spline_m->y_grid.num - 1);
SplineBound<T>::get(z * spline_m->z_grid.delta_inv, tz, iz,
spline_m->z_grid.num - 1);
T a[4], b[4], c[4];

MultiBsplineData<T>::compute_prefactors(a, tx);
MultiBsplineData<T>::compute_prefactors(b, ty);
MultiBsplineData<T>::compute_prefactors(c, tz);

const intptr_t xs = spline_m->x_stride;
const intptr_t ys = spline_m->y_stride;
const intptr_t zs = spline_m->z_stride;

#ifdef ENABLE_OFFLOAD
#pragma omp for nowait
#else
#pragma omp simd aligned(vals)
#endif
for (size_t n = 0; n < num_splines; n++)
{
T val = T();
for (size_t i = 0; i < 4; i++)
for (size_t j = 0; j < 4; j++)
{
const T *restrict coefs =
spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs;
val += a[i] * b[j] *
(c[0] * coefs[n] + c[1] * coefs[n + zs] +
c[2] * coefs[n + 2 * zs] + c[3] * coefs[n + 3 * zs]);
}
vals[n] = val;
}
}

template <typename T>
inline void
MultiBsplineOffload<T>::evaluate_vgl(const spliner_type *restrict spline_m,
Expand Down
41 changes: 39 additions & 2 deletions src/QMCWaveFunctions/einspline_spo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,46 @@ struct einspline_spo : public SPOSet
{
ScopedTimer local_timer(timer);

auto u = Lattice.toUnit_floor(p);
if (nBlocks != psi_shadows.size())
{
psi_shadows.resize(nBlocks);

T** restrict psi_shadows_ptr = psi_shadows.data();
for (int i = 0; i < nBlocks; ++i)
{
T* restrict psi_ptr = psi[i].data();
#ifdef ENABLE_OFFLOAD
#pragma omp target map(to : i) device(0)
#endif
{
psi_shadows_ptr[i] = psi_ptr;
}
}
}

OMPTinyVector<T, 3> u = Lattice.toUnit_floor(p);

T** restrict psi_shadows_ptr = psi_shadows.data();
spline_type** restrict einsplines_ptr = einsplines.data();

#ifdef ENABLE_OFFLOAD
#pragma omp target teams distribute num_teams(nBlocks) device(0) \
map(to : nBlocks, nSplinesPerBlock) map(always, to : u)
#else
#pragma omp parallel for
#endif
for (int i = 0; i < nBlocks; ++i)
compute_engine.evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock);
{
#ifdef ENABLE_OFFLOAD
#pragma omp parallel num_threads(nSplinesPerBlock)
#endif
MultiBsplineOffload<T>::evaluate_v_v2(einsplines_ptr[i],
u[0],
u[1],
u[2],
psi_shadows_ptr[i],
nSplinesPerBlock);
}
}

/** evaluate psi */
Expand Down

0 comments on commit bd7e78a

Please sign in to comment.