23 const GpuArray<Real,AMREX_SPACEDIM> ,
const Array4<const Real>& mskr,
24 const Array4<const Real>& msku,
const Array4<const Real>& mskv,
25 const Array4<const Real>& calc_arr,
26 int icomp,
int ncomp, Real ,
int bccomp,
int n_not_fill)
29 const auto& dom_lo = amrex::lbound(domain);
30 const auto& dom_hi = amrex::ubound(domain);
35 Vector<BCRec> bcrs(ncomp);
45 amrex::Gpu::DeviceVector<BCRec> bcrs_d(ncomp);
47 Gpu::htod_memcpy_async(bcrs_d.data(), bcrs.data(),
sizeof(BCRec)*ncomp);
49 std::memcpy(bcrs_d.data(), bcrs.data(),
sizeof(BCRec)*ncomp);
51 const amrex::BCRec* bc_ptr = bcrs_d.data();
53 GpuArray<GpuArray<Real, AMREX_SPACEDIM*2>,AMREX_SPACEDIM+
NCONS+8> l_bc_extdir_vals_d;
55 for (
int i = 0; i < ncomp; i++) {
56 for (
int ori = 0; ori < 2*AMREX_SPACEDIM; ori++) {
61 GeometryData
const& geomdata =
m_geom.data();
62 bool is_periodic_in_x = geomdata.isPeriodic(0);
63 bool is_periodic_in_y = geomdata.isPeriodic(1);
64 const Real eps= 1.0e-20_rt;
66 Box dest_arr_box = Box(dest_arr);
68 if (!is_periodic_in_x)
70 Box bx_xlo(bx); bx_xlo.setBig (0,dom_lo.x-1);
71 bx_xlo.setSmall(1,std::max(valid_bx.smallEnd(1)-1,dom_lo.y)); bx_xlo.setBig(1,std::min(valid_bx.bigEnd(1)+1,dom_hi.y));
72 Box bx_xhi(bx); bx_xhi.setSmall(0,dom_hi.x+1);
73 bx_xhi.setSmall(1,std::max(valid_bx.smallEnd(1)-1,dom_lo.y)); bx_xhi.setBig(1,std::min(valid_bx.bigEnd(1)+1,dom_hi.y));
75 bx_xlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
77 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][0] * mskr(i,j,0);
79 Real grad_lo = (calc_arr(dom_lo.x ,j ,k,icomp+n) - calc_arr(dom_lo.x ,j-1,k,icomp+n)) * mskv(i,j,0);
80 Real grad_lo_jp1 = (calc_arr(dom_lo.x ,j+1,k,icomp+n) - calc_arr(dom_lo.x ,j ,k,icomp+n)) * mskv(i,j,0);
81 Real dTdt = calc_arr(dom_lo.x,j,k,icomp+n) - dest_arr(dom_lo.x ,j,k,icomp+n);
82 Real dTdx = dest_arr(dom_lo.x,j,k,icomp+n) - dest_arr(dom_lo.x+1,j,k,icomp+n);
83 if (dTdt*dTdx < 0.0_rt) dTdt = 0.0_rt;
84 Real dTde = (dTdt * (grad_lo+grad_lo_jp1) > 0.0_rt) ? grad_lo : grad_lo_jp1;
85 Real cff = std::max(dTdx*dTdx+dTde*dTde,eps);
86 Real Cx = dTdt * dTdx;
87 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(dom_lo.x-1,j,k,icomp+n) + Cx * dest_arr(dom_lo.x,j,k,icomp+n)) * mskr(i,j,0) / (cff+Cx);
90 bx_xhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
92 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][3] * mskr(i,j,0);
94 Real grad_hi = (calc_arr(dom_hi.x ,j ,k,icomp+n) - calc_arr(dom_hi.x ,j-1,k,icomp+n)) * mskv(i,j,0);
95 Real grad_hi_jp1 = (calc_arr(dom_hi.x ,j+1,k,icomp+n) - calc_arr(dom_hi.x ,j ,k,icomp+n)) * mskv(i,j,0);
96 Real dTdt = calc_arr(dom_hi.x,j,k,icomp+n) - dest_arr(dom_hi.x ,j,k,icomp+n);
97 Real dTdx = dest_arr(dom_hi.x,j,k,icomp+n) - dest_arr(dom_hi.x-1,j,k,icomp+n);
98 if (dTdt * dTdx < 0.0_rt) dTdt = 0.0_rt;
99 Real dTde = (dTdt * (grad_hi + grad_hi_jp1) > 0.0_rt) ? grad_hi : grad_hi_jp1;
100 Real cff = std::max(dTdx*dTdx + dTde*dTde,eps);
101 Real Cx = dTdt * dTdx;
102 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(dom_hi.x+1,j,k,icomp+n) + Cx * dest_arr(dom_hi.x,j,k,icomp+n)) * mskr(i,j,0) / (cff+Cx);
108 if (!is_periodic_in_y)
110 Box bx_ylo(bx); bx_ylo.setBig (1,dom_lo.y-1);
111 bx_ylo.setSmall(0,std::max(valid_bx.smallEnd(0)-1,dom_lo.x)); bx_ylo.setBig(0,std::min(valid_bx.bigEnd(0)+1,dom_hi.x));
112 Box bx_yhi(bx); bx_yhi.setSmall(1,dom_hi.y+1);
113 bx_yhi.setSmall(0,std::max(valid_bx.smallEnd(0)-1,dom_lo.x)); bx_yhi.setBig(0,std::min(valid_bx.bigEnd(0)+1,dom_hi.x));
115 bx_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
117 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][1] * mskr(i,j,0);
119 Real grad_lo = (calc_arr(i ,dom_lo.y, k,icomp+n) - calc_arr(i-1,dom_lo.y ,k,icomp+n)) * msku(i,j,0);
120 Real grad_lo_ip1 = (calc_arr(i+1,dom_lo.y ,k,icomp+n) - calc_arr(i ,dom_lo.y ,k,icomp+n)) * msku(i,j,0);
121 Real dTdt = calc_arr(i,dom_lo.y,k,icomp+n) - dest_arr(i,dom_lo.y ,k,icomp+n);
122 Real dTde = dest_arr(i,dom_lo.y,k,icomp+n) - dest_arr(i,dom_lo.y+1,k,icomp+n);
123 if (dTdt * dTde < 0.0_rt) dTdt = 0.0_rt;
124 Real dTdx = (dTdt * (grad_lo + grad_lo_ip1) > 0.0_rt) ? grad_lo : grad_lo_ip1;
125 Real cff = std::max(dTdx*dTdx + dTde*dTde, eps);
127 dest_arr(i,j,k,icomp+n) = (cff * calc_arr(i,dom_lo.y-1,k,icomp+n) + Ce * dest_arr(i,dom_lo.y,k,icomp+n)) * mskr(i,j,0) / (cff+Ce);
130 bx_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
132 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][4] * mskr(i,j,0);
134 Real grad_hi = (calc_arr(i ,dom_hi.y ,k,icomp+n) - calc_arr(i-1,dom_hi.y ,k,icomp+n)) * msku(i,j,0);
135 Real grad_hi_ip1 = (calc_arr(i+1,dom_hi.y ,k,icomp+n) - calc_arr(i ,dom_hi.y ,k,icomp+n)) * msku(i,j,0);
136 Real dTdt = calc_arr(i,dom_hi.y,k,icomp+n) - dest_arr(i,dom_hi.y ,k,icomp+n);
137 Real dTde = dest_arr(i,dom_hi.y,k,icomp+n) - dest_arr(i,dom_hi.y-1,k,icomp+n);
138 if (dTdt * dTde < 0.0_rt) dTdt = 0.0_rt;
139 Real dTdx = (dTdt * (grad_hi + grad_hi_ip1) > 0.0_rt) ? grad_hi : grad_hi_ip1;
140 Real cff = std::max(dTdx*dTdx + dTde*dTde, eps);
142 dest_arr(i,j,k,icomp+n) = (cff*calc_arr(i,dom_hi.y+1,k,icomp+n) + Ce*dest_arr(i,dom_hi.y,k,icomp+n)) * mskr(i,j,0) / (cff+Ce);
149 Box bx_zlo(bx); bx_zlo.setBig (2,dom_lo.z-1);
150 Box bx_zhi(bx); bx_zhi.setSmall(2,dom_hi.z+1);
152 bx_zlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
154 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][2] * mskr(i,j,0);
157 bx_zhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
159 dest_arr(i,j,k,icomp+n) = l_bc_extdir_vals_d[n][5] * mskr(i,j,0);
165 Box bx_xlo(bx); bx_xlo.setBig (0,dom_lo.x-1-n_not_fill);
166 bx_xlo.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
167 bx_xlo.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
168 Box bx_xhi(bx); bx_xhi.setSmall(0,dom_hi.x+1+n_not_fill);
169 bx_xhi.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
170 bx_xhi.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
171 Box bx_ylo(bx); bx_ylo.setBig (1,dom_lo.y-1-n_not_fill);
172 bx_ylo.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
173 bx_ylo.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
174 Box bx_yhi(bx); bx_yhi.setSmall(1,dom_hi.y+1+n_not_fill);
175 bx_yhi.setSmall(2,std::max(dom_lo.z,bx.smallEnd(2)));
176 bx_yhi.setBig (2,std::min(dom_hi.z,bx.bigEnd(2)));
178 Box xlo_ylo = bx_xlo & bx_ylo;
179 Box xhi_ylo = bx_xhi & bx_ylo;
180 Box xlo_yhi = bx_xlo & bx_yhi;
181 Box xhi_yhi = bx_xhi & bx_yhi;
191 ParallelFor(bx_xlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
192 int iflip = dom_lo.x - 1 - i;
196 dest_arr(i,j,k,icomp+n) = dest_arr(dom_lo.x-n_not_fill-inner,j,k,icomp+n);
198 dest_arr(i,j,k,icomp+n) = dest_arr(iflip,j,k,icomp+n);
200 dest_arr(i,j,k,icomp+n) = -dest_arr(iflip,j,k,icomp+n);
203 bx_xhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
204 int iflip = 2*dom_hi.x + 1 - i;
208 dest_arr(i,j,k,icomp+n) = dest_arr(dom_hi.x+n_not_fill+inner,j,k,icomp+n);
210 dest_arr(i,j,k,icomp+n) = dest_arr(iflip,j,k,icomp+n);
212 dest_arr(i,j,k,icomp+n) = -dest_arr(iflip,j,k,icomp+n);
222 bx_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
223 int jflip = dom_lo.y - 1 - j;
227 dest_arr(i,j,k,icomp+n) = dest_arr(i,dom_lo.y-n_not_fill-inner,k,icomp+n);
229 dest_arr(i,j,k,icomp+n) = dest_arr(i,jflip,k,icomp+n);
231 dest_arr(i,j,k,icomp+n) = -dest_arr(i,jflip,k,icomp+n);
234 bx_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n) {
235 int jflip = 2*dom_hi.y + 1 - j;
239 dest_arr(i,j,k,icomp+n) = dest_arr(i,dom_hi.y+n_not_fill+inner,k,icomp+n);
241 dest_arr(i,j,k,icomp+n) = dest_arr(i,jflip,k,icomp+n);
243 dest_arr(i,j,k,icomp+n) = -dest_arr(i,jflip,k,icomp+n);
250 Box bx_zlo(bx); bx_zlo.setBig (2,std::max(dom_lo.z-1,bx.smallEnd(2)));
251 Box bx_zhi(bx); bx_zhi.setSmall(2,std::min(dom_hi.z+1,bx.bigEnd(2)));
255 ParallelFor(bx_zlo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
257 int kflip = dom_lo.z - 1 - i;
259 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,dom_lo.z,icomp+n);
261 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,kflip,icomp+n);
263 dest_arr(i,j,k,icomp+n) = -dest_arr(i,j,kflip,icomp+n);
269 ParallelFor(bx_zhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
271 int kflip = 2*dom_hi.z + 1 - i;
273 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,dom_hi.z,icomp+n);
275 dest_arr(i,j,k,icomp+n) = dest_arr(i,j,kflip,icomp+n);
277 dest_arr(i,j,k,icomp+n) = -dest_arr(i,j,kflip,icomp+n);
284 if (!xlo_ylo.isEmpty()) {
285 ParallelFor(xlo_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
291 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_lo.y,k,icomp+n)
292 + dest_arr(dom_lo.x,j,k,icomp+n));
296 if (!xlo_yhi.isEmpty()) {
297 ParallelFor(xlo_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
303 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_hi.y,k,icomp+n)
304 + dest_arr(dom_lo.x,j,k,icomp+n));
308 if (!xhi_ylo.isEmpty()) {
309 ParallelFor(xhi_ylo & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
315 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_lo.y,k,icomp+n)
316 + dest_arr(dom_hi.x,j,k,icomp+n));
320 if (!xhi_yhi.isEmpty()) {
321 ParallelFor(xhi_yhi & dest_arr_box, ncomp, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k,
int n)
327 dest_arr(i,j,k,icomp+n) = 0.5 * (dest_arr(i,dom_hi.y,k,icomp+n)
328 + dest_arr(dom_hi.x,j,k,icomp+n));
334 Gpu::streamSynchronize();