Skip to content

Commit de7b7f4

Browse files
WeiqunZhangcgilet
andauthored
Fix Tensor Solver BC (AMReX-Codes#2930)
This fixes some bugs in the physical domain BC of tensor linear solver. At the corner of two no-slip walls (e.g., (0,0)), we have u(-1,0) = -u(0,0) and u(0,-1) = -u(0,0). It's incorrect to fill the corner ghost cell with u(-1,-1) = u(-1,0) + u(0,-1) - u(0,0), because it will result in u(-1,-1) = -3 * u(0,0). In the old approach, to avoid branches in computing transverse derivatives on cell faces, we fill the ghost cells first. For example, to compute du/dy at the lo-x boundary, we use the data in i = -1 and 0, just like we compute du/dy(i) using u(i-1) and u(i) for interior faces. The problem is the normal velocity in the ghost cells outside a wall is filled with extrapolation of the Dirichlet value (which is zero) and more than 1 interior cells. Because of the high-order extrapolation, u(-1) != -u(0). This is the desired approach for computing du/dx on the wall. However, this produces incorrect results in dudy. In the new approach, we explicitly handle the boundaries in the derivative stencil. For example, to compute transverse derivatives on an inflow face, we use the boundary values directly. Co-authored-by: cgilet <[email protected]>
1 parent 13aa4df commit de7b7f4

12 files changed

+3487
-1359
lines changed

Src/Base/AMReX_Orientation.H

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public:
7575
* according to the above ordering.
7676
*/
7777
AMREX_GPU_HOST_DEVICE
78-
operator int () const noexcept { return val; }
78+
constexpr operator int () const noexcept { return val; }
7979
//! Return opposite orientation.
8080
AMREX_GPU_HOST_DEVICE
8181
Orientation flip () const noexcept
@@ -97,6 +97,30 @@ public:
9797
//! Read from an istream.
9898
friend std::istream& operator>> (std::istream& os, Orientation& o);
9999

100+
//! Int value of the x-lo-face
101+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
102+
static constexpr int xlo () noexcept { return 0; }
103+
104+
//! Int value of the x-hi-face
105+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
106+
static constexpr int xhi () noexcept { return AMREX_SPACEDIM; }
107+
108+
//! Int value of the y-lo-face
109+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
110+
static constexpr int ylo () noexcept { return 1; }
111+
112+
//! Int value of the y-hi-face
113+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
114+
static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; }
115+
116+
//! Int value of the z-lo-face
117+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
118+
static constexpr int zlo () noexcept { return 2; }
119+
120+
//! Int value of the z-hi-face
121+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
122+
static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; }
123+
100124
private:
101125
//! Used internally.
102126
AMREX_GPU_HOST_DEVICE

Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ public: // for cuda
105105

106106
void applyBCTensor (int amrlev, int mglev, MultiFab& vel,
107107
BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry) const;
108-
void compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const;
108+
void compCrossTerms(int amrlev, int mglev, MultiFab const& mf,
109+
const MLMGBndry* bndry) const;
109110
};
110111

111112
}

Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp

Lines changed: 151 additions & 149 deletions
Large diffs are not rendered by default.

Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
1313
const auto& bcondloc = *m_bcondloc[amrlev][mglev];
1414
const auto& maskvals = m_maskvals[amrlev][mglev];
1515

16-
FArrayBox foofab(Box::TheUnitBox(),3);
17-
const auto& foo = foofab.array();
16+
Array4<Real const> foo;
1817

1918
const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray();
2019
const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1);
20+
const auto dlo = amrex::lbound(domain);
21+
const auto dhi = amrex::ubound(domain);
2122

2223
auto factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
2324
const FabArray<EBCellFlagFab>* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr;
@@ -39,14 +40,13 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
3940
const auto & bdlv = bcondloc.bndryLocs(mfi);
4041
const auto & bdcv = bcondloc.bndryConds(mfi);
4142

42-
GpuArray<BoundCond,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bct;
43-
GpuArray<Real,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bcl;
44-
for (OrientationIter face; face; ++face) {
45-
Orientation ori = face();
46-
const int iface = ori;
47-
for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
48-
bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori];
49-
bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori];
43+
Array2D<BoundCond,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bct;
44+
Array2D<Real,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bcl;
45+
for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
46+
for (OrientationIter face; face; ++face) {
47+
Orientation ori = face();
48+
bct(ori,icomp) = bdcv[icomp][ori];
49+
bcl(ori,icomp) = bdlv[icomp][ori];
5050
}
5151
}
5252

@@ -72,7 +72,7 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
7272
mxlo, mylo, mxhi, myhi,
7373
bvxlo, bvylo, bvxhi, bvyhi,
7474
bct, bcl, inhomog, imaxorder,
75-
dxinv, domain);
75+
dxinv, dlo, dhi);
7676
});
7777
#else
7878
const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi);
@@ -83,28 +83,50 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
8383
const auto& bvzhi = (bndry != nullptr) ?
8484
(*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo;
8585

86-
AMREX_HOST_DEVICE_FOR_1D ( 12, iedge,
86+
#ifdef AMREX_USE_GPU
87+
if (Gpu::inLaunchRegion()) {
88+
amrex::launch(12, 64, Gpu::gpuStream(),
89+
#ifdef AMREX_USE_DPCPP
90+
[=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item)
91+
{
92+
int bid = item.get_group_linear_id();
93+
int tid = item.get_local_linear_id();
94+
int bdim = item.get_local_range(0);
95+
#else
96+
[=] AMREX_GPU_DEVICE ()
97+
{
98+
int bid = blockIdx.x;
99+
int tid = threadIdx.x;
100+
int bdim = blockDim.x;
101+
#endif
102+
mltensor_fill_edges(bid, tid, bdim, vbx, velfab,
103+
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
104+
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
105+
bct, bcl, inhomog, imaxorder,
106+
dxinv, dlo, dhi);
107+
});
108+
} else
109+
#endif
87110
{
88-
mltensor_fill_edges(iedge, vbx, velfab,
111+
mltensor_fill_edges(vbx, velfab,
89112
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
90113
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
91114
bct, bcl, inhomog, imaxorder,
92-
dxinv, domain);
93-
});
115+
dxinv, dlo, dhi);
116+
}
94117

95118
AMREX_HOST_DEVICE_FOR_1D ( 8, icorner,
96119
{
97120
mltensor_fill_corners(icorner, vbx, velfab,
98121
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
99122
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
100123
bct, bcl, inhomog, imaxorder,
101-
dxinv, domain);
124+
dxinv, dlo, dhi);
102125
});
126+
103127
#endif
104128
}
105129
}
106-
107-
// Notet that it is incorrect to call EnforcePeriodicity on vel.
108130
}
109131

110132
}

Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H

Lines changed: 120 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,95 @@
66

77
namespace amrex {
88

9-
namespace {
10-
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
11-
Real mlebtensor_weight (int d) {
12-
return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0);
9+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
10+
void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
11+
Array4<Real const> const& vel,
12+
Array4<Real const> const& etax,
13+
Array4<Real const> const& kapx,
14+
Array4<Real const> const& apx,
15+
Array4<EBCellFlag const> const& flag,
16+
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
17+
{
18+
const Real dyi = dxinv[1];
19+
const auto lo = amrex::lbound(box);
20+
const auto hi = amrex::ubound(box);
21+
constexpr Real twoThirds = 2./3.;
22+
23+
int k = 0;
24+
for (int j = lo.y; j <= hi.y; ++j) {
25+
AMREX_PRAGMA_SIMD
26+
for (int i = lo.x; i <= hi.x; ++i) {
27+
if (apx(i,j,0) == 0.0)
28+
{
29+
fx(i,j,0,0) = 0.0;
30+
fx(i,j,0,1) = 0.0;
31+
}
32+
else
33+
{
34+
int jhip = j + flag(i ,j,0).isConnected(0, 1,0);
35+
int jhim = j - flag(i ,j,0).isConnected(0,-1,0);
36+
int jlop = j + flag(i-1,j,0).isConnected(0, 1,0);
37+
int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
38+
Real whi = mlebtensor_weight(jhip-jhim);
39+
Real wlo = mlebtensor_weight(jlop-jlom);
40+
Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
41+
whi,wlo,jhip,jhim,jlop,jlom);
42+
Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
43+
whi,wlo,jhip,jhim,jlop,jlom);
44+
Real divu = dvdy;
45+
Real xif = kapx(i,j,0);
46+
Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
47+
Real mut = etax(i,j,0,1);
48+
fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
49+
fx(i,j,0,1) = -mut*dudy;
50+
}
51+
}
52+
}
53+
}
54+
55+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
56+
void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
57+
Array4<Real const> const& vel,
58+
Array4<Real const> const& etay,
59+
Array4<Real const> const& kapy,
60+
Array4<Real const> const& apy,
61+
Array4<EBCellFlag const> const& flag,
62+
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
63+
{
64+
const Real dxi = dxinv[0];
65+
const auto lo = amrex::lbound(box);
66+
const auto hi = amrex::ubound(box);
67+
constexpr Real twoThirds = 2./3.;
68+
69+
int k = 0;
70+
for (int j = lo.y; j <= hi.y; ++j) {
71+
AMREX_PRAGMA_SIMD
72+
for (int i = lo.x; i <= hi.x; ++i) {
73+
if (apy(i,j,0) == 0.0)
74+
{
75+
fy(i,j,0,0) = 0.0;
76+
fy(i,j,0,1) = 0.0;
77+
}
78+
else
79+
{
80+
int ihip = i + flag(i,j ,0).isConnected( 1,0,0);
81+
int ihim = i - flag(i,j ,0).isConnected(-1,0,0);
82+
int ilop = i + flag(i,j-1,0).isConnected( 1,0,0);
83+
int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
84+
Real whi = mlebtensor_weight(ihip-ihim);
85+
Real wlo = mlebtensor_weight(ilop-ilom);
86+
Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
87+
whi,wlo,ihip,ihim,ilop,ilom);
88+
Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
89+
whi,wlo,ihip,ihim,ilop,ilom);
90+
Real divu = dudx;
91+
Real xif = kapy(i,j,0);
92+
Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
93+
Real mut = etay(i,j,0,0);
94+
fy(i,j,0,0) = -mut*dvdx;
95+
fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
96+
}
97+
}
1398
}
1499
}
15100

@@ -20,13 +105,20 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
20105
Array4<Real const> const& kapx,
21106
Array4<Real const> const& apx,
22107
Array4<EBCellFlag const> const& flag,
23-
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
108+
GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
109+
Array4<Real const> const& bvxlo,
110+
Array4<Real const> const& bvxhi,
111+
Array2D<BoundCond,
112+
0,2*AMREX_SPACEDIM,
113+
0,AMREX_SPACEDIM> const& bct,
114+
Dim3 const& dlo, Dim3 const& dhi) noexcept
24115
{
25116
const Real dyi = dxinv[1];
26117
const auto lo = amrex::lbound(box);
27118
const auto hi = amrex::ubound(box);
28119
constexpr Real twoThirds = 2./3.;
29120

121+
int k = 0;
30122
for (int j = lo.y; j <= hi.y; ++j) {
31123
AMREX_PRAGMA_SIMD
32124
for (int i = lo.x; i <= hi.x; ++i) {
@@ -43,13 +135,15 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
43135
int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
44136
Real whi = mlebtensor_weight(jhip-jhim);
45137
Real wlo = mlebtensor_weight(jlop-jlom);
46-
Real dudy = (0.5*dyi) * ((vel(i ,jhip,0,0)-vel(i ,jhim,0,0))*whi
47-
+(vel(i-1,jlop,0,0)-vel(i-1,jlom,0,0))*wlo);
48-
Real dvdy = (0.5*dyi) * ((vel(i ,jhip,0,1)-vel(i ,jhim,0,1))*whi
49-
+(vel(i-1,jlop,0,1)-vel(i-1,jlom,0,1))*wlo);
138+
Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
139+
bvxlo,bvxhi,bct,dlo,dhi,
140+
whi,wlo,jhip,jhim,jlop,jlom);
141+
Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
142+
bvxlo,bvxhi,bct,dlo,dhi,
143+
whi,wlo,jhip,jhim,jlop,jlom);
50144
Real divu = dvdy;
51145
Real xif = kapx(i,j,0);
52-
Real mun = 0.75*(etax(i,j,0,0)-xif); // restore the original eta
146+
Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
53147
Real mut = etax(i,j,0,1);
54148
fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
55149
fx(i,j,0,1) = -mut*dudy;
@@ -65,13 +159,20 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
65159
Array4<Real const> const& kapy,
66160
Array4<Real const> const& apy,
67161
Array4<EBCellFlag const> const& flag,
68-
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
162+
GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
163+
Array4<Real const> const& bvylo,
164+
Array4<Real const> const& bvyhi,
165+
Array2D<BoundCond,
166+
0,2*AMREX_SPACEDIM,
167+
0,AMREX_SPACEDIM> const& bct,
168+
Dim3 const& dlo, Dim3 const& dhi) noexcept
69169
{
70170
const Real dxi = dxinv[0];
71171
const auto lo = amrex::lbound(box);
72172
const auto hi = amrex::ubound(box);
73173
constexpr Real twoThirds = 2./3.;
74174

175+
int k = 0;
75176
for (int j = lo.y; j <= hi.y; ++j) {
76177
AMREX_PRAGMA_SIMD
77178
for (int i = lo.x; i <= hi.x; ++i) {
@@ -88,15 +189,16 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
88189
int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
89190
Real whi = mlebtensor_weight(ihip-ihim);
90191
Real wlo = mlebtensor_weight(ilop-ilom);
91-
Real dudx = (0.5*dxi) * ((vel(ihip,j ,0,0)-vel(ihim,j ,0,0))*whi
92-
+(vel(ilop,j-1,0,0)-vel(ilom,j-1,0,0))*wlo);
93-
Real dvdx = (0.5*dxi) * ((vel(ihip,j ,0,1)-vel(ihim,j ,0,1))*whi
94-
+(vel(ilop,j-1,0,1)-vel(ilom,j-1,0,1))*wlo);
95-
192+
Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
193+
bvylo,bvyhi,bct,dlo,dhi,
194+
whi,wlo,ihip,ihim,ilop,ilom);
195+
Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
196+
bvylo,bvyhi,bct,dlo,dhi,
197+
whi,wlo,ihip,ihim,ilop,ilom);
96198
Real divu = dudx;
97199
Real xif = kapy(i,j,0);
98-
Real mun = 0.75*(etay(i,j,0,1)-xif); // restore the original eta
99-
Real mut = etay(i,j,0,0);
200+
Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
201+
Real mut = etay(i,j,0,0);
100202
fy(i,j,0,0) = -mut*dvdx;
101203
fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
102204
}

0 commit comments

Comments
 (0)