Add derivative shape factors (#831)

* add derivative shape factor * fix factor of -1
Hi-PACE · Dec 2, 2022 · a5ab0cc · a5ab0cc
1 parent 56404b1
commit a5ab0cc
Show file tree

Hide file tree

Showing 6 changed files with 349 additions and 87 deletions.
diff --git a/docs/source/run/parameters.rst b/docs/source/run/parameters.rst
@@ -106,6 +106,9 @@ General parameters
 * ``hipace.depos_order_z`` (`int`) optional (default `0`)
     Transverse particle shape order. Currently, only `0` is implemented.
 
+* ``hipace.depos_derivative_type`` (`int`) optional (default `2`)
+    Type of derivative used in explicit deposition. `0`: analytic, `1`: nodal, `2`: centered
+
 * ``hipace.outer_depos_loop`` (`bool`) optional (default `0`)
     If the loop over depos_order is included in the loop over particles.
 

diff --git a/src/Hipace.H b/src/Hipace.H
@@ -313,6 +313,9 @@ public:
     /** Order of the field gather and current deposition shape factor in the longitudinal direction
      */
     static int m_depos_order_z;
+    /** Type of derivative used in explicit deposition. 0: analytic, 1: nodal, 2: centered
+     */
+    static int m_depos_derivative_type;
     /* if the loop over depos_order is included in the loop over particles
      */
     static bool m_outer_depos_loop;

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
@@ -49,6 +49,7 @@ amrex::Real Hipace::m_initial_time = 0.0;
 int Hipace::m_verbose = 0;
 int Hipace::m_depos_order_xy = 2;
 int Hipace::m_depos_order_z = 0;
+int Hipace::m_depos_derivative_type = 2;
 bool Hipace::m_outer_depos_loop = false;
 amrex::Real Hipace::m_predcorr_B_error_tolerance = 4e-2;
 int Hipace::m_predcorr_max_iterations = 30;
@@ -128,6 +129,9 @@ Hipace::Hipace () :
                             "Multiple boxes per rank only implemented for one rank.");
     queryWithParser(pph, "depos_order_xy", m_depos_order_xy);
     queryWithParser(pph, "depos_order_z", m_depos_order_z);
+    queryWithParser(pph, "depos_derivative_type", m_depos_derivative_type);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_depos_order_xy != 0 || m_depos_derivative_type != 0,
+                            "Analytic derivative with depos_order=0 would vanish");
     queryWithParser(pph, "outer_depos_loop", m_outer_depos_loop);
     queryWithParser(pph, "predcorr_B_error_tolerance", m_predcorr_B_error_tolerance);
     queryWithParser(pph, "predcorr_max_iterations", m_predcorr_max_iterations);

diff --git a/src/particles/deposition/ExplicitDeposition.cpp b/src/particles/deposition/ExplicitDeposition.cpp
@@ -9,6 +9,7 @@
 
 #include "Hipace.H"
 #include "particles/particles_utils/ShapeFactors.H"
+#include "particles/particles_utils/FieldGather.H"
 #include "utils/Constants.H"
 #include "utils/HipaceProfilerWrapper.H"
 #include "utils/GPUUtil.H"
@@ -47,10 +48,10 @@ ExplicitDeposition (PlasmaParticleContainer& plasma, Fields& fields, const Multi
         int const * const AMREX_RESTRICT a_ion_lev =
             plasma.m_can_ionize ? soa.GetIntData(PlasmaIdx::ion_lev).data() : nullptr;
 
-        // Construct empty Array4 with one z slice so that Array3 constructor works for no laser
+        // Construct empty Array4 if there is no laser
         const Array3<const amrex::Real> a_laser_arr = multi_laser.m_use_laser ?
             multi_laser.getSlices(WhichLaserSlice::n00j00).const_array(pti) :
-            amrex::Array4<const amrex::Real>(nullptr, {0,0,0}, {0,0,1}, 0);
+            amrex::Array4<const amrex::Real>();
 
         const amrex::Real x_pos_offset = GetPosOffset(0, gm, isl_fab.box());
         const amrex::Real y_pos_offset = GetPosOffset(1, gm, isl_fab.box());
@@ -62,27 +63,37 @@ ExplicitDeposition (PlasmaParticleContainer& plasma, Fields& fields, const Multi
 
         const PhysConst pc = get_phys_const();
         const amrex::Real clight = pc.c;
+        const amrex::Real clight_inv = 1._rt/pc.c;
         // The laser a0 is always normalized
         const amrex::Real a_laser_fac = (pc.m_e/pc.q_e) * (pc.m_e/pc.q_e);
         const amrex::Real charge_invvol_mu0 = plasma.m_charge * invvol * pc.mu0;
         const amrex::Real charge_mass_ratio = plasma.m_charge / plasma.m_mass;
 
         amrex::ParallelFor(
-            amrex::TypeList<amrex::CompileTimeOptions<0, 1, 2, 3>,      // depos_order
-                            amrex::CompileTimeOptions<false, true>,     // can_ionize
-                            amrex::CompileTimeOptions<false, true>>{},  // use_laser
-            {Hipace::m_depos_order_xy, plasma.m_can_ionize, multi_laser.m_use_laser},
+            amrex::TypeList<
+                amrex::CompileTimeOptions<0, 1, 2, 3>,  // depos_order
+                amrex::CompileTimeOptions<0, 1, 2>,     // derivative_type
+                amrex::CompileTimeOptions<false, true>, // can_ionize
+                amrex::CompileTimeOptions<false, true>  // use_laser
+            >{}, {
+                Hipace::m_depos_order_xy,
+                Hipace::m_depos_derivative_type,
+                plasma.m_can_ionize,
+                multi_laser.m_use_laser
+            },
             pti.numParticles(),
-            [=] AMREX_GPU_DEVICE (int ip, auto a_depos_order, auto can_ionize, auto use_laser) {
+            [=] AMREX_GPU_DEVICE (int ip, auto a_depos_order, auto a_derivative_type,
+                                          auto can_ionize, auto use_laser) noexcept {
                 constexpr int depos_order = a_depos_order.value;
+                constexpr int derivative_type = a_derivative_type.value;
 
                 const auto positions = pos_structs[ip];
                 if (positions.id() < 0) return;
-                const amrex::Real psi = psip[ip];
+                const amrex::Real psi_inv = 1._rt/psip[ip];
                 const amrex::Real xp = positions.pos(0);
                 const amrex::Real yp = positions.pos(1);
-                const amrex::Real vx = uxp[ip] / (psi * clight);
-                const amrex::Real vy = uyp[ip] / (psi * clight);
+                const amrex::Real vx = uxp[ip] * psi_inv * clight_inv;
+                const amrex::Real vy = uyp[ip] * psi_inv * clight_inv;
 
                 amrex::Real q_invvol_mu0 = charge_invvol_mu0;
                 amrex::Real q_mass_ratio = charge_mass_ratio;
@@ -96,75 +107,46 @@ ExplicitDeposition (PlasmaParticleContainer& plasma, Fields& fields, const Multi
                 const amrex::Real charge_density_mu0 = q_invvol_mu0 * wp[ip];
 
                 const amrex::Real xmid = (xp - x_pos_offset) * dx_inv;
-                amrex::Real sx_cell[depos_order + 1];
-                const int i_cell = compute_shape_factor<depos_order>(sx_cell, xmid);
-
-                // y direction
                 const amrex::Real ymid = (yp - y_pos_offset) * dy_inv;
-                amrex::Real sy_cell[depos_order + 1];
-                const int j_cell = compute_shape_factor<depos_order>(sy_cell, ymid);
 
                 amrex::Real Aabssqp = 0._rt;
                 // Rename variable for NVCC lambda capture to work
                 [[maybe_unused]] auto laser_arr = a_laser_arr;
                 if constexpr (use_laser.value) {
-                    for (int iy=0; iy<=depos_order; iy++){
-                        for (int ix=0; ix<=depos_order; ix++){
-                            // Its important that Aabssqp is first fully gathered and not used
-                            // directly per cell like AabssqDxp and AabssqDyp
-                            const amrex::Real x00y00 = abssq(
-                                laser_arr(i_cell+ix, j_cell+iy, 0),
-                                laser_arr(i_cell+ix, j_cell+iy, 1) );
-                            Aabssqp += sx_cell[ix]*sy_cell[iy]*x00y00;
-                        }
-                    }
+                    // Its important that Aabssqp is first fully gathered and not used
+                    // directly per cell like AabssqDxp and AabssqDyp
+                    doLaserGatherShapeN<depos_order>(xp, yp, Aabssqp, laser_arr,
+                                                     dx_inv, dy_inv, x_pos_offset, y_pos_offset);
                 }
 
                 // calculate gamma/psi for plasma particles
                 const amrex::Real gamma_psi = 0.5_rt * (
-                    (1._rt + 0.5_rt * Aabssqp) / (psi * psi) // TODO: fix units
+                    (1._rt + 0.5_rt * Aabssqp) * psi_inv * psi_inv // TODO: fix units
                     + vx * vx
                     + vy * vy
                     + 1._rt
                 );
 
-                for (int iy=0; iy <= depos_order+2; ++iy) {
-                    // normal shape factor
-                    amrex::Real shape_y = 0._rt;
-                    // derivative shape factor
-                    amrex::Real shape_dy = 0._rt;
-                    if (iy != 0 && iy != depos_order + 2) {
-                        shape_y = sy_cell[iy-1] * charge_density_mu0;
-                    }
-                    if (iy < depos_order + 1) {
-                        shape_dy = sy_cell[iy];
-                    }
-                    if (iy > 1) {
-                        shape_dy -= sy_cell[iy-2];
-                    }
-                    shape_dy *= dy_inv * 0.5_rt * clight * charge_density_mu0;
-
-                    for (int ix=0; ix <= depos_order+2; ++ix) {
-                        amrex::Real shape_x = 0._rt;
-                        amrex::Real shape_dx = 0._rt;
-                        if (ix != 0 && ix != depos_order + 2) {
-                            shape_x = sx_cell[ix-1];
-                        }
-                        if (ix < depos_order + 1) {
-                            shape_dx = sx_cell[ix];
-                        }
-                        if (ix > 1) {
-                            shape_dx -= sx_cell[ix-2];
-                        }
-                        shape_dx *= dx_inv * 0.5_rt * clight;
-
-                        if ((ix==0 || ix==depos_order + 2) && (iy==0 || iy==depos_order + 2)) {
-                            // corners have a shape factor of zero
-                            continue;
+#ifdef AMREX_USE_GPU
+#pragma unroll
+#endif
+                for (int iy=0; iy <= depos_order+derivative_type; ++iy) {
+#ifdef AMREX_USE_GPU
+#pragma unroll
+#endif
+                    for (int ix=0; ix <= depos_order+derivative_type; ++ix) {
+
+                        if constexpr (derivative_type == 2) {
+                            if ((ix==0 || ix==depos_order + 2) && (iy==0 || iy==depos_order + 2)) {
+                                // corners have a shape factor of zero
+                                continue;
+                            }
                         }
 
-                        const int i = i_cell + ix - 1;
-                        const int j = j_cell + iy - 1;
+                        auto [shape_y, shape_dy, j] =
+                            single_derivative_shape_factor<derivative_type, depos_order>(ymid, iy);
+                        auto [shape_x, shape_dx, i] =
+                            single_derivative_shape_factor<derivative_type, depos_order>(xmid, ix);
 
                         // get fields per cell instead of gathering them to avoid blurring
                         const amrex::Real Bz_v = arr(i,j,Bz);
@@ -192,38 +174,37 @@ ExplicitDeposition (PlasmaParticleContainer& plasma, Fields& fields, const Multi
                             AabssqDyp = (x00yp1-x00ym1) * 0.5_rt * dy_inv * laser_fac * clight;
                         }
 
-                        amrex::Gpu::Atomic::Add(arr.ptr(i, j, Sy),
+                        amrex::Gpu::Atomic::Add(arr.ptr(i, j, Sy), charge_density_mu0 * (
                             - shape_x * shape_y * (
                                 - Bz_v * vx
                                 + ( Ez_v * vy
                                 + ExmBy_v * (          - vx * vy)
-                                + EypBx_v * (gamma_psi - vy * vy) ) / clight
-                                - 0.25_rt * AabssqDyp * q_mass_ratio / psi
-                            ) * q_mass_ratio / psi
-                            - shape_dx * shape_y * (
+                                + EypBx_v * (gamma_psi - vy * vy) ) * clight_inv
+                                - 0.25_rt * AabssqDyp * q_mass_ratio * psi_inv
+                            ) * q_mass_ratio * psi_inv
+                            + ( - shape_dx * shape_y * dx_inv * (
                                 - vx * vy
                             )
-                            - shape_x * shape_dy * (
+                            - shape_x * shape_dy * dy_inv * (
                                 gamma_psi - vy * vy - 1._rt
-                            )
-                        );
+                            )) * clight
+                        ));
 
-                        amrex::Gpu::Atomic::Add(arr.ptr(i, j, Sx),
+                        amrex::Gpu::Atomic::Add(arr.ptr(i, j, Sx), charge_density_mu0 * (
                             + shape_x * shape_y * (
                                 + Bz_v * vy
                                 + ( Ez_v * vx
                                 + ExmBy_v * (gamma_psi - vx * vx)
-                                + EypBx_v * (          - vx * vy) ) / clight
-                                - 0.25_rt * AabssqDxp * q_mass_ratio / psi
-                            ) * q_mass_ratio / psi
-                            + shape_dx * shape_y * (
+                                + EypBx_v * (          - vx * vy) ) * clight_inv
+                                - 0.25_rt * AabssqDxp * q_mass_ratio * psi_inv
+                            ) * q_mass_ratio * psi_inv
+                            + ( + shape_dx * shape_y * dx_inv * (
                                 gamma_psi - vx * vx - 1._rt
                             )
-                            + shape_x * shape_dy * (
+                            + shape_x * shape_dy * dy_inv * (
                                 - vx * vy
-                            )
-                        );
-
+                            )) * clight
+                        ));
                     }
                 }
             });

diff --git a/src/particles/deposition/PlasmaDepositCurrent.cpp b/src/particles/deposition/PlasmaDepositCurrent.cpp
@@ -94,6 +94,7 @@ DepositCurrent (PlasmaParticleContainer& plasma, Fields & fields, const MultiLas
         const amrex::Real dy_inv = 1._rt/dx[1];
 
         const PhysConst pc = get_phys_const();
+        const amrex::Real clight = pc.c;
         const amrex::Real clightinv = 1.0_rt/pc.c;
         const amrex::Real charge_invvol = charge * invvol;
         const amrex::Real charge_mu0_mass_ratio = charge * pc.mu0 / mass;
@@ -168,8 +169,7 @@ DepositCurrent (PlasmaParticleContainer& plasma, Fields & fields, const MultiLas
                     amrex::CompileTimeOptions<false, true>, // do_tiling
 #endif
                     amrex::CompileTimeOptions<false, true>  // use_laser
-                >{},
-                {
+                >{}, {
                     Hipace::m_depos_order_xy,
                     Hipace::m_outer_depos_loop,
                     plasma.m_can_ionize,
@@ -199,11 +199,11 @@ DepositCurrent (PlasmaParticleContainer& plasma, Fields & fields, const MultiLas
 
                 const auto positions = pos_structs[ip];
                 if (positions.id() < 0) return;
-                const amrex::Real psi = psip[ip];
+                const amrex::Real psi_inv = 1._rt/psip[ip];
                 const amrex::Real xp = positions.pos(0);
                 const amrex::Real yp = positions.pos(1);
-                const amrex::Real vx_c = uxp[ip] / psi;
-                const amrex::Real vy_c = uyp[ip] / psi;
+                const amrex::Real vx_c = uxp[ip] * psi_inv;
+                const amrex::Real vy_c = uyp[ip] * psi_inv;
 
                 // calculate charge of the plasma particles
                 amrex::Real q_invvol = charge_invvol;
@@ -226,7 +226,7 @@ DepositCurrent (PlasmaParticleContainer& plasma, Fields & fields, const MultiLas
 
                 // calculate gamma/psi for plasma particles
                 const amrex::Real gamma_psi = 0.5_rt * (
-                    (1._rt + 0.5_rt * Aabssqp) / (psi * psi) // TODO: fix units
+                    (1._rt + 0.5_rt * Aabssqp) * psi_inv * psi_inv // TODO: fix units
                     + vx_c * vx_c * clightinv * clightinv
                     + vy_c * vy_c * clightinv * clightinv
                     + 1._rt
@@ -269,9 +269,9 @@ DepositCurrent (PlasmaParticleContainer& plasma, Fields & fields, const MultiLas
                         // wqx, wqy wqz are particle current in each direction
                         const amrex::Real wqx  = charge_density * vx_c;
                         const amrex::Real wqy  = charge_density * vy_c;
-                        const amrex::Real wqz  = charge_density * (gamma_psi-1._rt) / clightinv;
+                        const amrex::Real wqz  = charge_density * (gamma_psi-1._rt) * clight;
                         const amrex::Real wq   = charge_density * gamma_psi;
-                        const amrex::Real wchi = charge_density * q_mu0_mass_ratio / psi;
+                        const amrex::Real wchi = charge_density * q_mu0_mass_ratio * psi_inv;
 
                         // Deposit current into isl_arr
                         if (jx != -1) { // deposit_jx_jy