30 const Box& xbx,
const Box& ybx,
31 const Array4<Real const>& uold ,
32 const Array4<Real const>& vold,
33 const Array4<Real >& ru,
34 const Array4<Real >& rv,
35 const Array4<Real >& rufrc,
36 const Array4<Real >& rvfrc,
37 const Array4<Real const>& sustr,
38 const Array4<Real const>& svstr,
39 const Array4<Real const>& bustr,
40 const Array4<Real const>& bvstr,
41 const Array4<Real const>& Huon,
42 const Array4<Real const>& Hvom,
43 const Array4<Real const>& pm,
44 const Array4<Real const>& pn,
45 const Array4<Real const>& W ,
46 const Array4<Real >& FC,
49 BL_PROFILE(
"REMORA::rhs_uv_3d()");
50 const Box& domain = geom[lev].Domain();
51 const auto dlo = amrex::lbound(domain);
52 const auto dhi = amrex::ubound(domain);
54 GeometryData
const& geomdata = geom[0].data();
55 bool is_periodic_in_x = geomdata.isPeriodic(0);
56 bool is_periodic_in_y = geomdata.isPeriodic(1);
61 FArrayBox fab_UFx(growLo(xbx,0,1),1,amrex::The_Async_Arena()); fab_UFx.template setVal<RunOn::Device>(0.);
62 FArrayBox fab_UFe(growHi(xbx,1,1),1,amrex::The_Async_Arena()); fab_UFe.template setVal<RunOn::Device>(0.);
63 FArrayBox fab_VFe(growLo(ybx,1,1),1,amrex::The_Async_Arena()); fab_VFe.template setVal<RunOn::Device>(0.);
64 FArrayBox fab_VFx(growHi(ybx,0,1),1,amrex::The_Async_Arena()); fab_VFx.template setVal<RunOn::Device>(0.);
66 auto UFx=fab_UFx.array();
67 auto UFe=fab_UFe.array();
68 auto VFx=fab_VFx.array();
69 auto VFe=fab_VFe.array();
74 const Real Gadv = -0.25_rt;
95 ParallelFor(growLo(xbx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
97 Real cff1 = uold(i,j,k,nrhs)+uold(i+1,j,k,nrhs);
99 Real uxx_i = uold(i-1,j,k,nrhs)-2.0_rt*uold(i ,j,k,nrhs)+uold(i+1,j,k,nrhs);
100 Real uxx_ip1 = uold(i ,j,k,nrhs)-2.0_rt*uold(i+1,j,k,nrhs)+uold(i+2,j,k,nrhs);
103 Real Huxx_i = Huon(i-1,j,k)-2.0_rt*Huon(i ,j,k)+Huon(i+1,j,k);
104 Real Huxx_ip1 = Huon(i ,j,k)-2.0_rt*Huon(i+1,j,k)+Huon(i+2,j,k);
106 if (i == dlo.x && !is_periodic_in_x) {
110 else if (i == dhi.x && !is_periodic_in_x) {
115 Real cff = (cff1 > 0.0_rt) ? uxx_i : uxx_ip1;
117 Real Huon_avg = (Huon(i,j,k) + Huon(i+1,j,k));
119 UFx(i,j,k) = 0.25_rt*(cff1+Gadv*cff) * ( Huon_avg + 0.5_rt*Gadv*(Huxx_i + Huxx_ip1) );
125 ParallelFor(growHi(xbx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
127 Real cff1 = uold(i,j,k,nrhs) + uold(i ,j-1,k,nrhs);
128 Real cff2 = Hvom(i,j,k) + Hvom(i-1,j ,k);
130 Real uee_jm1 = uold(i,j-2,k,nrhs) - 2.0_rt*uold(i,j-1,k,nrhs) + uold(i ,j,k,nrhs);
131 Real uee_j = uold(i,j-1,k,nrhs) - 2.0_rt*uold(i,j ,k,nrhs) + uold(i,j+1,k,nrhs);
133 if (j == dlo.y and !is_periodic_in_y) {
135 }
else if (j == dhi.y+1 and !is_periodic_in_y) {
140 Real cff = (cff2 > 0.0_rt) ? uee_jm1 : uee_j;
142 Real Hvxx_i = Hvom(i-1,j,k)-2.0_rt*Hvom(i ,j,k)+Hvom(i+1,j,k);
143 Real Hvxx_im1 = Hvom(i-2,j,k)-2.0_rt*Hvom(i-1,j,k)+Hvom(i ,j,k);
145 UFe(i,j,k) = 0.25_rt * (cff1+Gadv*cff)* (cff2+Gadv*0.5_rt*(Hvxx_i + Hvxx_im1));
148 ParallelFor(growLo(xbx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
150 UFx(i,j,k) = 0.25_rt * (uold(i,j,k,nrhs) + uold(i+1,j,k,nrhs)) * (Huon(i,j,k)+Huon(i+1,j,k));
152 ParallelFor(growHi(xbx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
154 UFe(i,j,k) = 0.25_rt * (uold(i,j-1,k,nrhs) + uold(i,j,k,nrhs)) * (Hvom(i-1,j,k)+Hvom(i,j,k));
161 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
163 ru(i,j,k,nrhs) -= ( (UFx(i,j,k)-UFx(i-1,j,k)) + (UFe(i,j+1,k)-UFe(i ,j,k)) );
167 ParallelFor(surroundingNodes(xbx,2), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
172 Real cff1=9.0_rt/16.0_rt;
173 Real cff2=1.0_rt/16.0_rt;
177 FC(i,j,k)=( cff1*(uold(i ,j,k-1,nrhs)+ uold(i,j,k ,nrhs))
178 -cff2*(uold(i ,j,k-2,nrhs)+ uold(i,j,k+1,nrhs)) )*
179 ( cff1*( W(i ,j,k)+ W(i-1,j,k))
180 -cff2*( W(i+1,j,k)+ W(i-2,j,k)) );
186 FC(i,j,N)=( cff1*(uold(i ,j,N-1,nrhs)+ uold(i,j,N ,nrhs))
187 -cff2*(uold(i ,j,N-2,nrhs)+ uold(i,j,N ,nrhs)) )*
188 ( cff1*( W(i ,j,N)+ W(i-1,j,N))
189 -cff2*( W(i+1,j,N)+ W(i-2,j,N)) );
191 FC(i,j,1)=( cff1*(uold(i ,j,0,nrhs)+ uold(i,j,1,nrhs))
192 -cff2*(uold(i ,j,0,nrhs)+ uold(i,j,2,nrhs)) )*
193 ( cff1*( W(i ,j,1)+ W(i-1,j,1))
194 -cff2*( W(i+1,j,1)+ W(i-2,j,1)) );
199 ParallelFor(surroundingNodes(xbx,2), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
202 FC(i,j,k) = 0.25_rt * (uold(i,j,k-1,nrhs)+uold(i,j,k,nrhs)) * (W(i,j,k) + W(i-1,j,k));
210 ParallelFor(xbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
212 Real cff = FC(i,j,k+1)-FC(i,j,k);
214 ru(i,j,k,nrhs) -= cff;
219 AMREX_ASSERT(xbx.smallEnd(2) == 0 && xbx.bigEnd(2) == N);
220 ParallelFor(makeSlab(xbx,2,0), [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
222 for (
int k = 0; k <= N; ++k)
224 rufrc(i,j,0) += ru(i,j,k,nrhs);
226 Real om_u = 2.0_rt / (pm(i-1,j,0)+pm(i,j,0));
227 Real on_u = 2.0_rt / (pn(i-1,j,0)+pn(i,j,0));
228 Real cff = om_u * on_u;
230 Real cff1 = (k == N) ? sustr(i,j,0)*cff : 0.0_rt;
231 Real cff2 = (k == 0) ? -bustr(i,j,0)*cff : 0.0_rt;
233 rufrc(i,j,0) += cff1+cff2;
253 ParallelFor(growHi(ybx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
255 Real cff1 = vold(i,j,k,nrhs) + vold(i-1,j ,k,nrhs);
256 Real cff2 = Huon(i,j,k) + Huon(i ,j-1,k);
258 Real vxx_im1 = vold(i-2,j,k,nrhs)-2.0_rt*vold(i-1,j,k,nrhs)+vold(i ,j,k,nrhs);
259 Real vxx_i = vold(i-1,j,k,nrhs)-2.0_rt*vold(i ,j,k,nrhs)+vold(i+1,j,k,nrhs);
261 if (i == dlo.x and !is_periodic_in_x) {
263 }
else if (i == dhi.x+1 and !is_periodic_in_x) {
268 Real cff = (cff2 > 0.0_rt) ? vxx_im1 : vxx_i;
271 Real Huee_j = Huon(i,j-1,k)-2.0_rt*Huon(i,j ,k)+Huon(i,j+1,k);
272 Real Huee_jm1 = Huon(i,j-2,k)-2.0_rt*Huon(i,j-1,k)+Huon(i,j ,k);
274 VFx(i,j,k) = 0.25_rt*(cff1+Gadv*cff)* (cff2+Gadv*0.5_rt*(Huee_j + Huee_jm1));
278 ParallelFor(growLo(ybx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
280 Real cff1=vold(i,j,k,nrhs)+vold(i,j+1,k,nrhs);
283 Real vee_j = vold(i,j-1,k,nrhs)-2.0_rt*vold(i,j ,k,nrhs)+ vold(i,j+1,k,nrhs);
284 Real vee_jp1 = vold(i,j ,k,nrhs)-2.0_rt*vold(i,j+1,k,nrhs)+ vold(i,j+2,k,nrhs);
286 Real Hvee_j = Hvom(i,j-1,k)-2.0_rt*Hvom(i,j ,k)+Hvom(i,j+1,k);
287 Real Hvee_jp1 = Hvom(i,j ,k)-2.0_rt*Hvom(i,j+1,k)+Hvom(i,j+2,k);
289 if (j == dlo.y and !is_periodic_in_y) {
293 else if (j == dhi.y and !is_periodic_in_y) {
297 Real cff = (cff1 > 0.0_rt) ? vee_j : vee_jp1;
299 VFe(i,j,k) = 0.25_rt * (cff1+Gadv*cff) * ( Hvom(i,j ,k)+ Hvom(i,j+1,k) + 0.5_rt * Gadv * (Hvee_j + Hvee_jp1) );
302 ParallelFor(growHi(ybx,0,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
304 VFx(i,j,k) = 0.25_rt * (vold(i-1,j,k,nrhs) + vold(i,j,k,nrhs)) * (Huon(i,j-1,k)+Huon(i,j,k));
306 ParallelFor(growLo(ybx,1,1), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
308 VFe(i,j,k) = 0.25_rt * (vold(i,j,k,nrhs) + vold(i,j+1,k,nrhs)) * (Hvom(i,j,k)+Hvom(i,j+1,k));
312 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
314 rv(i,j,k,nrhs) -= ( (VFx(i+1,j,k)-VFx(i,j,k)) + (VFe(i,j,k)-VFe(i,j-1,k)) );
318 ParallelFor(surroundingNodes(ybx,2), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
320 Real cff1=9.0_rt/16.0_rt;
321 Real cff2=1.0_rt/16.0_rt;
325 FC(i,j,k)=( cff1*(vold(i,j,k-1,nrhs)+ vold(i,j,k ,nrhs))
326 -cff2*(vold(i,j,k-2,nrhs)+ vold(i,j,k+1,nrhs)) )*
327 ( cff1*(W(i,j ,k)+ W(i,j-1,k))
328 -cff2*(W(i,j+1,k)+ W(i,j-2,k)) );
333 FC(i,j,N)=( cff1*(vold(i,j,N-1,nrhs)+ vold(i,j,N ,nrhs))
334 -cff2*(vold(i,j,N-2,nrhs)+ vold(i,j,N ,nrhs)) )*
335 ( cff1*(W(i,j ,N)+ W(i,j-1,N))
336 -cff2*(W(i,j+1,N)+ W(i,j-2,N)) );
337 FC(i,j,1)=( cff1*(vold(i,j,0,nrhs)+ vold(i,j,1,nrhs))
338 -cff2*(vold(i,j,0,nrhs)+ vold(i,j,2,nrhs)) )*
339 ( cff1*(W(i,j ,1)+ W(i,j-1,1))
340 -cff2*(W(i,j+1,1)+ W(i,j-2,1)) );
346 ParallelFor(surroundingNodes(ybx,2), [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
349 FC(i,j,k) = 0.25_rt * (vold(i,j,k-1,nrhs)+vold(i,j,k,nrhs)) * (W(i,j,k) + W(i,j-1,k));
358 ParallelFor(ybx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
360 Real cff = FC(i,j,k+1)-FC(i,j,k);
362 rv(i,j,k,nrhs) -= cff;
367 ParallelFor(makeSlab(ybx,2,0), [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
369 for (
int k = 0; k <= N; ++k)
371 rvfrc(i,j,0) += rv(i,j,k,nrhs);
373 Real om_v = 2.0_rt / (pm(i,j-1,0)+pm(i,j,0));
374 Real on_v = 2.0_rt / (pn(i,j-1,0)+pn(i,j,0));
375 Real cff = om_v * on_v;
377 Real cff1 = (k == N) ? svstr(i,j,0)*cff : 0.0_rt;
378 Real cff2 = (k == 0) ? -bvstr(i,j,0)*cff : 0.0_rt;
380 rvfrc(i,j,0) += cff1+cff2;