23 const GpuArray<Real,AMREX_SPACEDIM> ,
const Array4<const Real>& mskr,
24 const Array4<const Real>& msku,
const Array4<const Real>& mskv,
25 const Array4<const Real>& calc_arr,
26 int icomp,
int ncomp, Real ,
int bccomp,
int n_not_fill)
29 const auto& dom_lo = amrex::lbound(domain);
30 const auto& dom_hi = amrex::ubound(domain);
35 Vector<BCRec> bcrs(ncomp);
45 amrex::Gpu::DeviceVector<BCRec> bcrs_d(ncomp);
47 Gpu::htod_memcpy_async(bcrs_d.data(), bcrs.data(),
sizeof(BCRec)*ncomp);
49 std::memcpy(bcrs_d.data(), bcrs.data(),
sizeof(BCRec)*ncomp);
51 const amrex::BCRec* bc_ptr = bcrs_d.data();
53 GpuArray<GpuArray<Real, AMREX_SPACEDIM*2>,AMREX_SPACEDIM+
NCONS+8> l_bc_extdir_vals_d;
55 for (
int i = 0; i < ncomp; i++) {
56 for (
int ori = 0; ori < 2*AMREX_SPACEDIM; ori++) {
61 GeometryData
const& geomdata =
m_geom.data();
62 bool is_periodic_in_x = geomdata.isPeriodic(0);
63 bool is_periodic_in_y = geomdata.isPeriodic(1);
64 const Real eps= 1.0e-20_rt;
67 if (!is_periodic_in_x)
69 Box bx_xlo(bx); bx_xlo.setBig (0,dom_lo.x-1);
70 bx_xlo.setSmall(1,std::max(valid_bx.smallEnd(1)-1,dom_lo.y)); bx_xlo.setBig(1,std::min(valid_bx.bigEnd(1)+1,dom_hi.y));
71 Box bx_xhi(bx); bx_xhi.setSmall(0,dom_hi.x+1);
72 bx_xhi.setSmall(1,std::max(valid_bx.smallEnd(1)-1,dom_lo.y)); bx_xhi.setBig(1,std::min(valid_bx.bigEnd(1)+1,dom_hi.y));
74 bx_xlo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
76 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][0] * mskr(i,j,0);
78 Real grad_lo = (calc_arr(dom_lo.x ,j ,k,icomp+n) - calc_arr(dom_lo.x ,j-1,k,icomp+n)) * mskv(i,j,0);
79 Real grad_lo_jp1 = (calc_arr(dom_lo.x ,j+1,k,icomp+n) - calc_arr(dom_lo.x ,j ,k,icomp+n)) * mskv(i,j,0);
80 Real dTdt = calc_arr(dom_lo.x,j,k,icomp+n) - dest_arr(dom_lo.x ,j,k,icomp+n);
81 Real dTdx = dest_arr(dom_lo.x,j,k,icomp+n) - dest_arr(dom_lo.x+1,j,k,icomp+n);
82 if (dTdt*dTdx < 0.0_rt) dTdt = 0.0_rt;
83 Real dTde = (dTdt * (grad_lo+grad_lo_jp1) > 0.0_rt) ? grad_lo : grad_lo_jp1;
84 Real cff = std::max(dTdx*dTdx+dTde*dTde,eps);
85 Real Cx = dTdt * dTdx;
86 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(dom_lo.x-1,j,k,icomp+n) + Cx * dest_arr(dom_lo.x,j,k,icomp+n)) * mskr(i,j,0) / (cff+Cx);
89 bx_xhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
91 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][3] * mskr(i,j,0);
93 Real grad_hi = (calc_arr(dom_hi.x ,j ,k,icomp+n) - calc_arr(dom_hi.x ,j-1,k,icomp+n)) * mskv(i,j,0);
94 Real grad_hi_jp1 = (calc_arr(dom_hi.x ,j+1,k,icomp+n) - calc_arr(dom_hi.x ,j ,k,icomp+n)) * mskv(i,j,0);
95 Real dTdt = calc_arr(dom_hi.x,j,k,icomp+n) - dest_arr(dom_hi.x ,j,k,icomp+n);
96 Real dTdx = dest_arr(dom_hi.x,j,k,icomp+n) - dest_arr(dom_hi.x-1,j,k,icomp+n);
97 if (dTdt * dTdx < 0.0_rt) dTdt = 0.0_rt;
98 Real dTde = (dTdt * (grad_hi + grad_hi_jp1) > 0.0_rt) ? grad_hi : grad_hi_jp1;
99 Real cff = std::max(dTdx*dTdx + dTde*dTde,eps);
100 Real Cx = dTdt * dTdx;
101 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(dom_hi.x+1,j,k,icomp+n) + Cx * dest_arr(dom_hi.x,j,k,icomp+n)) * mskr(i,j,0) / (cff+Cx);
107 if (!is_periodic_in_y)
109 Box bx_ylo(bx); bx_ylo.setBig (1,dom_lo.y-1);
110 bx_ylo.setSmall(0,std::max(valid_bx.smallEnd(0)-1,dom_lo.x)); bx_ylo.setBig(0,std::min(valid_bx.bigEnd(0)+1,dom_hi.x));
111 Box bx_yhi(bx); bx_yhi.setSmall(1,dom_hi.y+1);
112 bx_yhi.setSmall(0,std::max(valid_bx.smallEnd(0)-1,dom_lo.x)); bx_yhi.setBig(0,std::min(valid_bx.bigEnd(0)+1,dom_hi.x));
114 bx_ylo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
116 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][1] * mskr(i,j,0);
118 Real grad_lo = (calc_arr(i ,dom_lo.y, k,icomp+n) - calc_arr(i-1,dom_lo.y ,k,icomp+n)) * msku(i,j,0);
119 Real grad_lo_ip1 = (calc_arr(i+1,dom_lo.y ,k,icomp+n) - calc_arr(i ,dom_lo.y ,k,icomp+n)) * msku(i,j,0);
120 Real dTdt = calc_arr(i,dom_lo.y,k,icomp+n) - dest_arr(i,dom_lo.y ,k,icomp+n);
121 Real dTde = dest_arr(i,dom_lo.y,k,icomp+n) - dest_arr(i,dom_lo.y+1,k,icomp+n);
122 if (dTdt * dTde < 0.0_rt) dTdt = 0.0_rt;
123 Real dTdx = (dTdt * (grad_lo + grad_lo_ip1) > 0.0_rt) ? grad_lo : grad_lo_ip1;
124 Real cff = std::max(dTdx*dTdx + dTde*dTde, eps);
126 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(i,dom_lo.y-1,k,icomp+n) + Ce * dest_arr(i,dom_lo.y,k,icomp+n)) * mskr(i,j,0) / (cff+Ce);
129 bx_yhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
131 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][4] * mskr(i,j,0);
133 Real grad_hi = (calc_arr(i ,dom_hi.y ,k,icomp+n) - calc_arr(i-1,dom_hi.y ,k,icomp+n)) * msku(i,j,0);
134 Real grad_hi_ip1 = (calc_arr(i+1,dom_hi.y ,k,icomp+n) - calc_arr(i ,dom_hi.y ,k,icomp+n)) * msku(i,j,0);
135 Real dTdt = calc_arr(i,dom_hi.y,k,icomp+n) - dest_arr(i,dom_hi.y ,k,icomp+n);
136 Real dTde = dest_arr(i,dom_hi.y,k,icomp+n) - dest_arr(i,dom_hi.y-1,k,icomp+n);
137 if (dTdt * dTde < 0.0_rt) dTdt = 0.0_rt;
138 Real dTdx = (dTdt * (grad_hi + grad_hi_ip1) > 0.0_rt) ? grad_hi : grad_hi_ip1;
139 Real cff = std::max(dTdx*dTdx + dTde*dTde, eps);
141 dest_arr(i,j,k,icomp+n) = (cff*calc_arr(i,dom_hi.y+1,k,icomp+n) + Ce*dest_arr(i,dom_hi.y,k,icomp+n)) * mskr(i,j,0) / (cff+Ce);
148 Box bx_zlo(bx); bx_zlo.setBig (2,dom_lo.z-1);
149 Box bx_zhi(bx); bx_zhi.setSmall(2,dom_hi.z+1);
151 bx_zlo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
153 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][2] * mskr(i,j,0);
156 bx_zhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
158 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][5] * mskr(i,j,0);
164 Box bx_xlo(bx); bx_xlo.setBig (0,dom_lo.x-1-n_not_fill);
165 bx_xlo.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
166 bx_xlo.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
167 Box bx_xhi(bx); bx_xhi.setSmall(0,dom_hi.x+1+n_not_fill);
168 bx_xhi.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
169 bx_xhi.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
170 Box bx_ylo(bx); bx_ylo.setBig (1,dom_lo.y-1-n_not_fill);
171 bx_ylo.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
172 bx_ylo.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
173 Box bx_yhi(bx); bx_yhi.setSmall(1,dom_hi.y+1+n_not_fill);
174 bx_yhi.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
175 bx_yhi.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
177 Box xlo_ylo = bx_xlo & bx_ylo;
178 Box xhi_ylo = bx_xhi & bx_ylo;
179 Box xlo_yhi = bx_xlo & bx_yhi;
180 Box xhi_yhi = bx_xhi & bx_yhi;
190 ParallelFor(bx_xlo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
191 int iflip = dom_lo.x - 1 - i;
195 dest_arr(i,j,k,icomp+n) = dest_arr(dom_lo.x-n_not_fill-inner,j,k,icomp+n);
197 dest_arr(i,j,k,icomp+n) = dest_arr(iflip,j,k,icomp+n);
199 dest_arr(i,j,k,icomp+n) = -dest_arr(iflip,j,k,icomp+n);
202 bx_xhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
203 int iflip = 2*dom_hi.x + 1 - i;
207 dest_arr(i,j,k,icomp+n) = dest_arr(dom_hi.x+n_not_fill+inner,j,k,icomp+n);
209 dest_arr(i,j,k,icomp+n) = dest_arr(iflip,j,k,icomp+n);
211 dest_arr(i,j,k,icomp+n) = -dest_arr(iflip,j,k,icomp+n);
221 bx_ylo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
222 int jflip = dom_lo.y - 1 - j;
226 dest_arr(i,j,k,icomp+n) = dest_arr(i,dom_lo.y-n_not_fill-inner,k,icomp+n);
228 dest_arr(i,j,k,icomp+n) = dest_arr(i,jflip,k,icomp+n);
230 dest_arr(i,j,k,icomp+n) = -dest_arr(i,jflip,k,icomp+n);
233 bx_yhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
234 int jflip = 2*dom_hi.y + 1 - j;
238 dest_arr(i,j,k,icomp+n) = dest_arr(i,dom_hi.y+n_not_fill+inner,k,icomp+n);
240 dest_arr(i,j,k,icomp+n) = dest_arr(i,jflip,k,icomp+n);
242 dest_arr(i,j,k,icomp+n) = -dest_arr(i,jflip,k,icomp+n);
249 Box bx_zlo(bx); bx_zlo.setBig (2,std::max(dom_lo.z-1,bx.smallEnd(2)));
250 Box bx_zhi(bx); bx_zhi.setSmall(2,std::min(dom_hi.z+1,bx.bigEnd(2)));
254 ParallelFor(bx_zlo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
256 int kflip = dom_lo.z - 1 - i;
258 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,dom_lo.z,icomp+n);
260 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,kflip,icomp+n);
262 dest_arr(i,j,k,icomp+n) = -dest_arr(i,j,kflip,icomp+n);
268 ParallelFor(bx_zhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
270 int kflip = 2*dom_hi.z + 1 - i;
272 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,dom_hi.z,icomp+n);
274 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,kflip,icomp+n);
276 dest_arr(i,j,k,icomp+n) = -dest_arr(i,j,kflip,icomp+n);
283 if (!xlo_ylo.isEmpty()) {
284 ParallelFor(xlo_ylo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
290 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_lo.y,k,icomp+n)
291 + dest_arr(dom_lo.x,j,k,icomp+n));
295 if (!xlo_yhi.isEmpty()) {
296 ParallelFor(xlo_yhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
302 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_hi.y,k,icomp+n)
303 + dest_arr(dom_lo.x,j,k,icomp+n));
307 if (!xhi_ylo.isEmpty()) {
308 ParallelFor(xhi_ylo, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
314 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_lo.y,k,icomp+n)
315 + dest_arr(dom_hi.x,j,k,icomp+n));
319 if (!xhi_yhi.isEmpty()) {
320 ParallelFor(xhi_yhi, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
326 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_hi.y,k,icomp+n)
327 + dest_arr(dom_hi.x,j,k,icomp+n));
333 Gpu::streamSynchronize();