23 const GpuArray<Real,AMREX_SPACEDIM> ,
const Array4<const Real>& mskr,
24 const Array4<const Real>& msku,
const Array4<const Real>& mskv,
25 const Array4<const Real>& calc_arr,
26 int icomp,
int ncomp, Real ,
int bccomp,
int n_not_fill)
29 const auto& dom_lo = amrex::lbound(domain);
30 const auto& dom_hi = amrex::ubound(domain);
35 Vector<BCRec> bcrs(ncomp);
45 amrex::Gpu::DeviceVector<BCRec> bcrs_d(ncomp);
47 Gpu::htod_memcpy_async(bcrs_d.data(), bcrs.data(),
sizeof(BCRec)*ncomp);
49 std::memcpy(bcrs_d.data(), bcrs.data(),
sizeof(BCRec)*ncomp);
51 const amrex::BCRec* bc_ptr = bcrs_d.data();
53 GpuArray<GpuArray<Real, AMREX_SPACEDIM*2>,AMREX_SPACEDIM+
NCONS+8> l_bc_extdir_vals_d;
55 for (
int i = 0; i < ncomp; i++) {
56 for (
int ori = 0; ori < 2*AMREX_SPACEDIM; ori++) {
61 GeometryData
const& geomdata =
m_geom.data();
62 bool is_periodic_in_x = geomdata.isPeriodic(0);
63 bool is_periodic_in_y = geomdata.isPeriodic(1);
64 const Real eps= 1.0e-20_rt;
73 Box dest_arr_box = Box(dest_arr);
75 if (!is_periodic_in_x)
77 Box bx_xlo(bx); bx_xlo.setBig (0,dom_lo.x-1);
78 bx_xlo.setSmall(1,std::max(valid_bx.smallEnd(1)-1,dom_lo.y)); bx_xlo.setBig(1,std::min(valid_bx.bigEnd(1)+1,dom_hi.y));
79 Box bx_xhi(bx); bx_xhi.setSmall(0,dom_hi.x+1);
80 bx_xhi.setSmall(1,std::max(valid_bx.smallEnd(1)-1,dom_lo.y)); bx_xhi.setBig(1,std::min(valid_bx.bigEnd(1)+1,dom_hi.y));
82 bx_xlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
84 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][0] * mskr(i,j,0);
86 Real grad_lo = (calc_arr(dom_lo.x ,j ,k,icomp_calc+n) - calc_arr(dom_lo.x ,j-1,k,icomp_calc+n)) * mskv(i,j,0);
87 Real grad_lo_jp1 = (calc_arr(dom_lo.x ,j+1,k,icomp_calc+n) - calc_arr(dom_lo.x ,j ,k,icomp_calc+n)) * mskv(i,j,0);
88 Real dTdt = calc_arr(dom_lo.x,j,k,icomp_calc+n) - dest_arr(dom_lo.x ,j,k,icomp+n);
89 Real dTdx = dest_arr(dom_lo.x,j,k,icomp+n) - dest_arr(dom_lo.x+1,j,k,icomp+n);
90 if (dTdt*dTdx < 0.0_rt) dTdt = 0.0_rt;
91 Real dTde = (dTdt * (grad_lo+grad_lo_jp1) > 0.0_rt) ? grad_lo : grad_lo_jp1;
92 Real cff = std::max(dTdx*dTdx+dTde*dTde,eps);
93 Real Cx = dTdt * dTdx;
94 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(dom_lo.x-1,j,k,icomp_calc+n) + Cx * dest_arr(dom_lo.x,j,k,icomp+n)) * mskr(i,j,0) / (cff+Cx);
97 bx_xhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
99 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][3] * mskr(i,j,0);
101 Real grad_hi = (calc_arr(dom_hi.x ,j ,k,icomp_calc+n) - calc_arr(dom_hi.x ,j-1,k,icomp_calc+n)) * mskv(i,j,0);
102 Real grad_hi_jp1 = (calc_arr(dom_hi.x ,j+1,k,icomp_calc+n) - calc_arr(dom_hi.x ,j ,k,icomp_calc+n)) * mskv(i,j,0);
103 Real dTdt = calc_arr(dom_hi.x,j,k,icomp_calc+n) - dest_arr(dom_hi.x ,j,k,icomp+n);
104 Real dTdx = dest_arr(dom_hi.x,j,k,icomp+n) - dest_arr(dom_hi.x-1,j,k,icomp+n);
105 if (dTdt * dTdx < 0.0_rt) dTdt = 0.0_rt;
106 Real dTde = (dTdt * (grad_hi + grad_hi_jp1) > 0.0_rt) ? grad_hi : grad_hi_jp1;
107 Real cff = std::max(dTdx*dTdx + dTde*dTde,eps);
108 Real Cx = dTdt * dTdx;
109 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(dom_hi.x+1,j,k,icomp_calc+n) + Cx * dest_arr(dom_hi.x,j,k,icomp+n)) * mskr(i,j,0) / (cff+Cx);
115 if (!is_periodic_in_y)
117 Box bx_ylo(bx); bx_ylo.setBig (1,dom_lo.y-1);
118 bx_ylo.setSmall(0,std::max(valid_bx.smallEnd(0)-1,dom_lo.x)); bx_ylo.setBig(0,std::min(valid_bx.bigEnd(0)+1,dom_hi.x));
119 Box bx_yhi(bx); bx_yhi.setSmall(1,dom_hi.y+1);
120 bx_yhi.setSmall(0,std::max(valid_bx.smallEnd(0)-1,dom_lo.x)); bx_yhi.setBig(0,std::min(valid_bx.bigEnd(0)+1,dom_hi.x));
122 bx_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
124 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][1] * mskr(i,j,0);
126 Real grad_lo = (calc_arr(i ,dom_lo.y, k,icomp_calc+n) - calc_arr(i-1,dom_lo.y ,k,icomp_calc+n)) * msku(i,j,0);
127 Real grad_lo_ip1 = (calc_arr(i+1,dom_lo.y ,k,icomp_calc+n) - calc_arr(i ,dom_lo.y ,k,icomp_calc+n)) * msku(i,j,0);
128 Real dTdt = calc_arr(i,dom_lo.y,k,icomp_calc+n) - dest_arr(i,dom_lo.y ,k,icomp+n);
129 Real dTde = dest_arr(i,dom_lo.y,k,icomp+n) - dest_arr(i,dom_lo.y+1,k,icomp+n);
130 if (dTdt * dTde < 0.0_rt) dTdt = 0.0_rt;
131 Real dTdx = (dTdt * (grad_lo + grad_lo_ip1) > 0.0_rt) ? grad_lo : grad_lo_ip1;
132 Real cff = std::max(dTdx*dTdx + dTde*dTde, eps);
134 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(i,dom_lo.y-1,k,icomp_calc+n) + Ce * dest_arr(i,dom_lo.y,k,icomp+n)) * mskr(i,j,0) / (cff+Ce);
137 bx_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
139 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][4] * mskr(i,j,0);
141 Real grad_hi = (calc_arr(i ,dom_hi.y ,k,icomp_calc+n) - calc_arr(i-1,dom_hi.y ,k,icomp_calc+n)) * msku(i,j,0);
142 Real grad_hi_ip1 = (calc_arr(i+1,dom_hi.y ,k,icomp_calc+n) - calc_arr(i ,dom_hi.y ,k,icomp_calc+n)) * msku(i,j,0);
143 Real dTdt = calc_arr(i,dom_hi.y,k,icomp_calc+n) - dest_arr(i,dom_hi.y ,k,icomp+n);
144 Real dTde = dest_arr(i,dom_hi.y,k,icomp+n) - dest_arr(i,dom_hi.y-1,k,icomp+n);
145 if (dTdt * dTde < 0.0_rt) dTdt = 0.0_rt;
146 Real dTdx = (dTdt * (grad_hi + grad_hi_ip1) > 0.0_rt) ? grad_hi : grad_hi_ip1;
147 Real cff = std::max(dTdx*dTdx + dTde*dTde, eps);
149 dest_arr(i,j,k,icomp+n) = (cff*calc_arr(i,dom_hi.y+1,k,icomp_calc+n) + Ce*dest_arr(i,dom_hi.y,k,icomp+n)) * mskr(i,j,0) / (cff+Ce);
156 Box bx_zlo(bx); bx_zlo.setBig (2,dom_lo.z-1);
157 Box bx_zhi(bx); bx_zhi.setSmall(2,dom_hi.z+1);
159 bx_zlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
161 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][2] * mskr(i,j,0);
164 bx_zhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
166 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][5] * mskr(i,j,0);
172 Box bx_xlo(bx); bx_xlo.setBig (0,dom_lo.x-1-n_not_fill);
173 bx_xlo.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
174 bx_xlo.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
175 Box bx_xhi(bx); bx_xhi.setSmall(0,dom_hi.x+1+n_not_fill);
176 bx_xhi.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
177 bx_xhi.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
178 Box bx_ylo(bx); bx_ylo.setBig (1,dom_lo.y-1-n_not_fill);
179 bx_ylo.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
180 bx_ylo.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
181 Box bx_yhi(bx); bx_yhi.setSmall(1,dom_hi.y+1+n_not_fill);
182 bx_yhi.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
183 bx_yhi.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
185 Box xlo_ylo = bx_xlo & bx_ylo;
186 Box xhi_ylo = bx_xhi & bx_ylo;
187 Box xlo_yhi = bx_xlo & bx_yhi;
188 Box xhi_yhi = bx_xhi & bx_yhi;
198 ParallelFor(bx_xlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
199 int iflip = dom_lo.x - 1 - i;
203 dest_arr(i,j,k,icomp+n) = dest_arr(dom_lo.x-n_not_fill-inner,j,k,icomp+n);
205 dest_arr(i,j,k,icomp+n) = dest_arr(iflip,j,k,icomp+n);
207 dest_arr(i,j,k,icomp+n) = -dest_arr(iflip,j,k,icomp+n);
210 bx_xhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
211 int iflip = 2*dom_hi.x + 1 - i;
215 dest_arr(i,j,k,icomp+n) = dest_arr(dom_hi.x+n_not_fill+inner,j,k,icomp+n);
217 dest_arr(i,j,k,icomp+n) = dest_arr(iflip,j,k,icomp+n);
219 dest_arr(i,j,k,icomp+n) = -dest_arr(iflip,j,k,icomp+n);
229 bx_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
230 int jflip = dom_lo.y - 1 - j;
234 dest_arr(i,j,k,icomp+n) = dest_arr(i,dom_lo.y-n_not_fill-inner,k,icomp+n);
236 dest_arr(i,j,k,icomp+n) = dest_arr(i,jflip,k,icomp+n);
238 dest_arr(i,j,k,icomp+n) = -dest_arr(i,jflip,k,icomp+n);
241 bx_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
242 int jflip = 2*dom_hi.y + 1 - j;
246 dest_arr(i,j,k,icomp+n) = dest_arr(i,dom_hi.y+n_not_fill+inner,k,icomp+n);
248 dest_arr(i,j,k,icomp+n) = dest_arr(i,jflip,k,icomp+n);
250 dest_arr(i,j,k,icomp+n) = -dest_arr(i,jflip,k,icomp+n);
257 Box bx_zlo(bx); bx_zlo.setBig (2,std::max(dom_lo.z-1,bx.smallEnd(2)));
258 Box bx_zhi(bx); bx_zhi.setSmall(2,std::min(dom_hi.z+1,bx.bigEnd(2)));
262 ParallelFor(bx_zlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
264 int kflip = dom_lo.z - 1 - i;
266 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,dom_lo.z,icomp+n);
268 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,kflip,icomp+n);
270 dest_arr(i,j,k,icomp+n) = -dest_arr(i,j,kflip,icomp+n);
276 ParallelFor(bx_zhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
278 int kflip = 2*dom_hi.z + 1 - i;
280 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,dom_hi.z,icomp+n);
282 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,kflip,icomp+n);
284 dest_arr(i,j,k,icomp+n) = -dest_arr(i,j,kflip,icomp+n);
291 if (!xlo_ylo.isEmpty()) {
292 ParallelFor(xlo_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
298 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_lo.y,k,icomp+n)
299 + dest_arr(dom_lo.x,j,k,icomp+n));
303 if (!xlo_yhi.isEmpty()) {
304 ParallelFor(xlo_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
310 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_hi.y,k,icomp+n)
311 + dest_arr(dom_lo.x,j,k,icomp+n));
315 if (!xhi_ylo.isEmpty()) {
316 ParallelFor(xhi_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
322 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_lo.y,k,icomp+n)
323 + dest_arr(dom_hi.x,j,k,icomp+n));
327 if (!xhi_yhi.isEmpty()) {
328 ParallelFor(xhi_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
334 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_hi.y,k,icomp+n)
335 + dest_arr(dom_hi.x,j,k,icomp+n));
341 Gpu::streamSynchronize();