Skip to content

Commit

Permalink
clean up deposition using ParallelFor with compile time optimizations (
Browse files Browse the repository at this point in the history
…#819)

* clean up deposition using ParallelFor with compile time optimizations

* fix whitespace

* remove k_cell

* fix invvol
  • Loading branch information
AlexanderSinn authored Nov 24, 2022
1 parent 8f477da commit 10c3366
Show file tree
Hide file tree
Showing 12 changed files with 585 additions and 899 deletions.
2 changes: 2 additions & 0 deletions src/laser/Laser.H
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@ namespace LaserFFT {
#else
# ifdef AMREX_USE_FLOAT
using VendorFFT = fftwf_plan;
using FFTWComplex = fftwf_complex;
const auto VendorCreate = fftwf_plan_dft_2d;
const auto VendorExecute = fftwf_execute;
const auto VendorDestroy = fftwf_destroy_plan;
# else
using VendorFFT = fftw_plan;
using FFTWComplex = fftw_complex;
const auto VendorCreate = fftw_plan_dft_2d;
const auto VendorExecute = fftw_execute;
const auto VendorDestroy = fftw_destroy_plan;
Expand Down
12 changes: 6 additions & 6 deletions src/laser/Laser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,14 @@ Laser::InitData (const amrex::BoxArray& slice_ba,
// Forward FFT plan
m_plan_fwd = LaserFFT::VendorCreate(
fft_size[1], fft_size[0],
reinterpret_cast<fftw_complex*>(m_rhs.dataPtr()),
reinterpret_cast<fftw_complex*>(m_rhs_fourier.dataPtr()),
reinterpret_cast<LaserFFT::FFTWComplex*>(m_rhs.dataPtr()),
reinterpret_cast<LaserFFT::FFTWComplex*>(m_rhs_fourier.dataPtr()),
FFTW_FORWARD, FFTW_ESTIMATE);
// Backward FFT plan
m_plan_bkw = LaserFFT::VendorCreate(
fft_size[1], fft_size[0],
reinterpret_cast<fftw_complex*>(m_rhs_fourier.dataPtr()),
reinterpret_cast<fftw_complex*>(m_sol.dataPtr()),
reinterpret_cast<LaserFFT::FFTWComplex*>(m_rhs_fourier.dataPtr()),
reinterpret_cast<LaserFFT::FFTWComplex*>(m_sol.dataPtr()),
FFTW_BACKWARD, FFTW_ESTIMATE);
#endif
}
Expand Down Expand Up @@ -267,8 +267,8 @@ Laser::AdvanceSliceMG (const Fields& fields, const amrex::Geometry& geom, amrex:

amrex::FArrayBox rhs_mg;
amrex::FArrayBox acoeff_real;
amrex::Real acoeff_real_scalar;
amrex::Real acoeff_imag_scalar;
amrex::Real acoeff_real_scalar = 0._rt;
amrex::Real acoeff_imag_scalar = 0._rt;

amrex::Real djn {0.};

Expand Down
170 changes: 135 additions & 35 deletions src/particles/deposition/BeamDepositCurrent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
*/
#include "BeamDepositCurrent.H"
#include "particles/beam/BeamParticleContainer.H"
#include "BeamDepositCurrentInner.H"
#include "particles/particles_utils/ShapeFactors.H"
#include "fields/Fields.H"
#include "utils/Constants.H"
#include "Hipace.H"
#include "utils/GPUUtil.H"
#include "utils/HipaceProfilerWrapper.H"
#include "Hipace.H"

#include <AMReX_DenseBins.H>

Expand All @@ -26,6 +27,9 @@ DepositCurrentSlice (BeamParticleContainer& beam, Fields& fields,
const int which_slice, int nghost)
{
HIPACE_PROFILE("DepositCurrentSlice_BeamParticleContainer()");

using namespace amrex::literals;

// Extract properties associated with physical size of the box
amrex::Real const * AMREX_RESTRICT dx = gm[lev].CellSize();

Expand All @@ -41,18 +45,19 @@ DepositCurrentSlice (BeamParticleContainer& beam, Fields& fields,
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(Hipace::m_depos_order_z == 0,
"Only order 0 deposition is allowed for beam per-slice deposition");

const bool explicit_solve = Hipace::GetInstance().m_explicit;

// Extract the fields currents
// Extract FabArray for this box (because there is currently no transverse
// parallelization, the index we want in the slice multifab is always 0.
// Fix later.
amrex::FArrayBox& isl_fab = fields.getSlices(lev, which_slice)[0];
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(isl_fab.box().ixType().cellCentered(),
"jx, jy, jz and rho must be nodal in all directions.");

// we deposit to the beam currents, because the explicit solver
// requires sometimes just the beam currents
// Do not access the field if the kernel later does not deposit into it,
// the field might not be allocated. Use 0 as dummy component instead
const std::string beam_str = explicit_solve ? "_beam" : "";
// the field might not be allocated. Use -1 as dummy component instead
const std::string beam_str = Hipace::GetInstance().m_explicit ? "_beam" : "";
const int jxb_cmp = do_beam_jx_jy_deposition ? Comps[which_slice]["jx" +beam_str] : -1;
const int jyb_cmp = do_beam_jx_jy_deposition ? Comps[which_slice]["jy" +beam_str] : -1;
const int jzb_cmp = do_beam_jz_deposition ? Comps[which_slice]["jz" +beam_str] : -1;
Expand All @@ -61,38 +66,133 @@ DepositCurrentSlice (BeamParticleContainer& beam, Fields& fields,
// Offset for converting positions to indexes
amrex::Real const x_pos_offset = GetPosOffset(0, gm[lev], isl_fab.box());
amrex::Real const y_pos_offset = GetPosOffset(1, gm[lev], isl_fab.box());
amrex::Real const z_pos_offset = GetPosOffset(2, gm[lev], isl_fab.box());

amrex::Real lev_weight_fac = 1.;
if (lev == 1 && Hipace::m_normalized_units) {
// re-scaling the weight in normalized units to get the same charge density on lev 1
// Not necessary in SI units, there the weight is the actual charge and not the density
amrex::Real const * AMREX_RESTRICT dx_lev0 = gm[0].CellSize();
lev_weight_fac = dx_lev0[0] * dx_lev0[1] * dx_lev0[2] / (dx[0] * dx[1] * dx[2]);

// Whether the ghost slice has to be deposited
const bool deposit_ghost = ((which_slice==WhichSlice::Next) && (islice == 0));
if (deposit_ghost && !do_beam_jx_jy_deposition) return;

// Ghost particles are indexed [beam.numParticles()-nghost, beam.numParticles()-1]
int box_offset = offset;
if (deposit_ghost) box_offset = beam.numParticles()-nghost;

PhysConst const phys_const = get_phys_const();

// Extract particle properties
const auto& aos = beam.GetArrayOfStructs(); // For positions
const auto& pos_structs = aos.begin() + box_offset;
const auto& soa = beam.GetStructOfArrays(); // For momenta and weights
const auto wp = soa.GetRealData(BeamIdx::w).data() + box_offset;
const auto uxp = soa.GetRealData(BeamIdx::ux).data() + box_offset;
const auto uyp = soa.GetRealData(BeamIdx::uy).data() + box_offset;
const auto uzp = soa.GetRealData(BeamIdx::uz).data() + box_offset;

// Extract box properties
const amrex::Real dxi = 1.0/dx[0];
const amrex::Real dyi = 1.0/dx[1];
const amrex::Real dzi = 1.0/dx[2];
amrex::Real invvol = dxi * dyi * dzi;

if (Hipace::m_normalized_units) {
if (lev == 0) {
invvol = 1._rt;
} else {
// re-scaling the weight in normalized units to get the same charge density on lev 1
// Not necessary in SI units, there the weight is the actual charge and not the density
amrex::Real const * AMREX_RESTRICT dx_lev0 = gm[0].CellSize();
invvol = dx_lev0[0] * dx_lev0[1] * dx_lev0[2] * dxi * dyi * dzi;
}
}

// For now: fix the value of the charge
const amrex::Real q = beam.m_charge * lev_weight_fac;

// Call deposition function in each box
if (Hipace::m_depos_order_xy == 0){
doDepositionShapeN<0, 0>( beam, isl_fab, jxb_cmp, jyb_cmp, jzb_cmp, rhob_cmp, dx,
x_pos_offset, y_pos_offset, z_pos_offset, q, islice, bins, offset,
do_beam_jx_jy_deposition, which_slice, nghost);
} else if (Hipace::m_depos_order_xy == 1){
doDepositionShapeN<1, 0>( beam, isl_fab, jxb_cmp, jyb_cmp, jzb_cmp, rhob_cmp, dx,
x_pos_offset, y_pos_offset, z_pos_offset, q, islice, bins, offset,
do_beam_jx_jy_deposition, which_slice, nghost);
} else if (Hipace::m_depos_order_xy == 2){
doDepositionShapeN<2, 0>( beam, isl_fab, jxb_cmp, jyb_cmp, jzb_cmp, rhob_cmp, dx,
x_pos_offset, y_pos_offset, z_pos_offset, q, islice, bins, offset,
do_beam_jx_jy_deposition, which_slice, nghost);
} else if (Hipace::m_depos_order_xy == 3){
doDepositionShapeN<3, 0>( beam, isl_fab, jxb_cmp, jyb_cmp, jzb_cmp, rhob_cmp, dx,
x_pos_offset, y_pos_offset, z_pos_offset, q, islice, bins, offset,
do_beam_jx_jy_deposition, which_slice, nghost);
const amrex::Real clightsq = 1.0_rt/(phys_const.c*phys_const.c);
const amrex::Real q = beam.m_charge;

Array3<amrex::Real> const isl_arr = isl_fab.array();

BeamBins::index_type const * const indices = bins.permutationPtr();
BeamBins::index_type const * const offsets = bins.offsetsPtrCpu();
BeamBins::index_type cell_start = 0;
BeamBins::index_type cell_stop = 0;

// The particles that are in slice islice are
// given by the indices[cell_start:cell_stop]
if (which_slice == WhichSlice::This || which_slice == WhichSlice::Salame) {
cell_start = offsets[islice];
cell_stop = offsets[islice+1];
} else {
amrex::Abort("unknown deposition order");
if (islice > 0) {
cell_start = offsets[islice-1];
cell_stop = offsets[islice];
} else {
cell_start = 0;
cell_stop = nghost;
}
}
int const num_particles = cell_stop-cell_start;

// Loop over particles and deposit into jx_fab, jy_fab, and jz_fab
amrex::ParallelFor(
amrex::TypeList<amrex::CompileTimeOptions<0, 1, 2, 3>>{},
{Hipace::m_depos_order_xy},
num_particles,
[=] AMREX_GPU_DEVICE (long idx, auto depos_order) {
constexpr int depos_order_xy = depos_order.value;

// Particles in the same slice must be accessed through the bin sorter.
// Ghost particles are simply contiguous in memory.
const int ip = deposit_ghost ? cell_start+idx : indices[cell_start+idx];

// Skip invalid particles and ghost particles not in the last slice
if (pos_structs[ip].id() < 0) return;
// --- Get particle quantities
const amrex::Real gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
+ uyp[ip]*uyp[ip]*clightsq
+ uzp[ip]*uzp[ip]*clightsq);
const amrex::Real wq = q*wp[ip]*invvol;

const amrex::Real vx = uxp[ip]*gaminv;
const amrex::Real vy = uyp[ip]*gaminv;
const amrex::Real vz = uzp[ip]*gaminv;
// wqx, wqy wqz are particle current in each direction
const amrex::Real wqx = wq*vx;
const amrex::Real wqy = wq*vy;
const amrex::Real wqz = wq*vz;

// --- Compute shape factors
// x direction
const amrex::Real xmid = (pos_structs[ip].pos(0) - x_pos_offset)*dxi;
// i_cell leftmost cell in x that the particle touches. sx_cell shape factor along x
amrex::Real sx_cell[depos_order_xy + 1];
const int i_cell = compute_shape_factor<depos_order_xy>(sx_cell, xmid);

// y direction
const amrex::Real ymid = (pos_structs[ip].pos(1) - y_pos_offset)*dyi;
amrex::Real sy_cell[depos_order_xy + 1];
const int j_cell = compute_shape_factor<depos_order_xy>(sy_cell, ymid);

// Deposit current into jx, jy, jz, rho
for (int iy=0; iy<=depos_order_xy; iy++){
for (int ix=0; ix<=depos_order_xy; ix++){
if (jxb_cmp != -1) { // do_beam_jx_jy_deposition
amrex::Gpu::Atomic::Add(
isl_arr.ptr(i_cell+ix, j_cell+iy, jxb_cmp),
sx_cell[ix]*sy_cell[iy]*wqx);
amrex::Gpu::Atomic::Add(
isl_arr.ptr(i_cell+ix, j_cell+iy, jyb_cmp),
sx_cell[ix]*sy_cell[iy]*wqy);
}
if (jzb_cmp != -1) { // do_beam_jz_deposition
amrex::Gpu::Atomic::Add(
isl_arr.ptr(i_cell+ix, j_cell+iy, jzb_cmp),
sx_cell[ix]*sy_cell[iy]*wqz);
}
if (rhob_cmp != -1) { // do_beam_rho_deposition
amrex::Gpu::Atomic::Add(
isl_arr.ptr(i_cell+ix, j_cell+iy, rhob_cmp),
sx_cell[ix]*sy_cell[iy]*wq);
}
}
}
}
);

}
Loading

0 comments on commit 10c3366

Please sign in to comment.