Hi, the latest Trixi.jl and TrixiCUDA.jl is working! Thanks of your work! but I got the error when I do the test for problems below
using Trixi, TrixiCUDA
using CUDA
# Currently we need to allow scalar indexing on GPU arrays for the tests to pass,
# once the issues are resolved, this line can be removed.
CUDA.allowscalar(true)
equations = ShallowWaterEquations1D(gravity_constant = 9.812, H0 = 1.75)
function initial_condition_stone_throw_discontinuous_bottom(x, t,
equations::ShallowWaterEquations1D)
# Flat lake
H = equations.H0
# Discontinuous velocity
v = 0.0
if x[1] >= -0.75 && x[1] <= 0.0
v = -1.0
elseif x[1] >= 0.0 && x[1] <= 0.75
v = 1.0
end
b = (1.5 / exp(0.5 * ((x[1] - 1.0)^2)) +
0.75 / exp(0.5 * ((x[1] + 1.0)^2)))
# Force a discontinuous bottom topography
if x[1] >= -1.5 && x[1] <= 0.0
b = 0.5
end
return prim2cons(SVector(H, v, b), equations)
end
initial_condition = initial_condition_stone_throw_discontinuous_bottom
boundary_condition = boundary_condition_slip_wall
volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
surface_flux = (FluxHydrostaticReconstruction(flux_lax_friedrichs,
hydrostatic_reconstruction_audusse_etal),
flux_nonconservative_audusse_etal)
basis = LobattoLegendreBasis(4)
basis_gpu = LobattoLegendreBasisGPU(4)
indicator_sc = IndicatorHennemannGassner(equations, basis,
alpha_max = 0.5,
alpha_min = 0.001,
alpha_smooth = true,
variable = waterheight_pressure)
volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)
solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
coordinates_min = -3.0
coordinates_max = 3.0
mesh = TreeMesh(coordinates_min, coordinates_max,
initial_refinement_level = 3,
n_cells_max = 10_000,
periodicity = false)
semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
boundary_conditions = boundary_condition)
tspan = tspan_gpu = (0.0, 3.0)
t = t_gpu = 0.0
# Semi on GPU
equations_gpu, mesh_gpu, solver_gpu = semi_gpu.equations, semi_gpu.mesh, semi_gpu.solver
cache_gpu, cache_cpu = semi_gpu.cache_gpu, semi_gpu.cache_cpu
boundary_conditions_gpu, source_terms_gpu = semi_gpu.boundary_conditions, semi_gpu.source_terms
# ODE on GPU
ode_gpu = semidiscretizeGPU(semi_gpu, tspan_gpu)
u_gpu_ = copy(ode_gpu.u0)
du_gpu_ = similar(u_gpu_)
u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu, cache_gpu)
(base) jovyan@workspace-0 ~/f/D/TrixiCUDA.jl> julia debug_shallow.jl
┌ Warning: It's not recommended to use allowscalar([true]) to allow scalar indexing.
│ Instead, use `allowscalar() do end` or `@allowscalar` to denote exactly which operations can use scalar operations.
└ @ GPUArraysCore ~/.julia/packages/GPUArraysCore/aNaXo/src/GPUArraysCore.jl:184
ERROR: LoadError: InvalidIRError: compiling MethodInstance for TrixiCUDA.boundary_flux_kernel!(::CuDeviceArray{Float64, 3, 1}, ::CuDeviceArray{Float64, 3, 1}, ::CuDeviceMatrix{Float64, 1}, ::Float64, ::CuDeviceVector{Int64, 1}, ::CuDeviceVector{Int64, 1}, ::CuDeviceVector{Int64, 1}, ::CuDeviceVector{Int64, 1}, ::CuDeviceVector{Int64, 1}, ::@NamedTuple{x_neg::typeof(boundary_condition_slip_wall), x_pos::typeof(boundary_condition_slip_wall)}, ::ShallowWaterEquations1D{Float64}, ::FluxHydrostaticReconstruction{FluxLaxFriedrichs{typeof(max_abs_speed_naive)}, typeof(hydrostatic_reconstruction_audusse_etal)}, ::typeof(flux_nonconservative_audusse_etal)) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to iterate)
Stacktrace:
[1] indexed_iterate
@ ./tuple.jl:162
[2] boundary_flux_kernel!
@ ~/DGCFN.jl/TrixiCUDA.jl/src/solvers/dg_1d_kernel.jl:0
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/validation.jl:167
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:381 [inlined]
[3] emit_llvm(job::GPUCompiler.CompilerJob; toplevel::Bool, libraries::Bool, optimize::Bool, cleanup::Bool, validate::Bool, only_entry::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/utils.jl:110
[4] emit_llvm
@ ~/.julia/packages/GPUCompiler/OGnEB/src/utils.jl:108 [inlined]
[5] codegen(output::Symbol, job::GPUCompiler.CompilerJob; toplevel::Bool, libraries::Bool, optimize::Bool, cleanup::Bool, validate::Bool, strip::Bool, only_entry::Bool, parent_job::Nothing)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:100
[6] codegen
@ ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:82 [inlined]
[7] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:79
[8] compile
@ ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:74 [inlined]
[9] #1171
@ ~/.julia/packages/CUDA/sWPBr/src/compiler/compilation.jl:255 [inlined]
[10] JuliaContext(f::CUDA.var"#1171#1174"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:34
[11] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/driver.jl:25
[12] compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/sWPBr/src/compiler/compilation.jl:254
[13] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/execution.jl:237
[14] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/OGnEB/src/execution.jl:151
[15] macro expansion
@ ~/.julia/packages/CUDA/sWPBr/src/compiler/execution.jl:373 [inlined]
[16] macro expansion
@ ./lock.jl:273 [inlined]
[17] cufunction(f::typeof(TrixiCUDA.boundary_flux_kernel!), tt::Type{Tuple{CuDeviceArray{Float64, 3, 1}, CuDeviceArray{Float64, 3, 1}, CuDeviceMatrix{Float64, 1}, Float64, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, @NamedTuple{x_neg::typeof(boundary_condition_slip_wall), x_pos::typeof(boundary_condition_slip_wall)}, ShallowWaterEquations1D{Float64}, FluxHydrostaticReconstruction{FluxLaxFriedrichs{typeof(max_abs_speed_naive)}, typeof(hydrostatic_reconstruction_audusse_etal)}, typeof(flux_nonconservative_audusse_etal)}}; kwargs::@Kwargs{})
@ CUDA ~/.julia/packages/CUDA/sWPBr/src/compiler/execution.jl:368
[18] cufunction(f::typeof(TrixiCUDA.boundary_flux_kernel!), tt::Type{Tuple{CuDeviceArray{Float64, 3, 1}, CuDeviceArray{Float64, 3, 1}, CuDeviceMatrix{Float64, 1}, Float64, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, CuDeviceVector{Int64, 1}, @NamedTuple{x_neg::typeof(boundary_condition_slip_wall), x_pos::typeof(boundary_condition_slip_wall)}, ShallowWaterEquations1D{Float64}, FluxHydrostaticReconstruction{FluxLaxFriedrichs{typeof(max_abs_speed_naive)}, typeof(hydrostatic_reconstruction_audusse_etal)}, typeof(flux_nonconservative_audusse_etal)}})
@ CUDA ~/.julia/packages/CUDA/sWPBr/src/compiler/execution.jl:365
[19] macro expansion
@ ~/.julia/packages/CUDA/sWPBr/src/compiler/execution.jl:112 [inlined]
[20] cuda_boundary_flux!(t::Float64, mesh::TreeMesh{1, Trixi.SerialTree{1, Float64}, Float64}, boundary_conditions::@NamedTuple{x_neg::typeof(boundary_condition_slip_wall), x_pos::typeof(boundary_condition_slip_wall)}, nonconservative_terms::Static.True, equations::ShallowWaterEquations1D{Float64}, dg::DGSEM{LobattoLegendreBasis{Float64, 5, CuArray{Float64, 1, CUDA.DeviceMemory}, Matrix{Float64}, Matrix{Float64}, CuArray{Float64, 2, CUDA.DeviceMemory}}, Trixi.LobattoLegendreMortarL2{Float64, 5, CuArray{Float64, 2, CUDA.DeviceMemory}, CuArray{Float64, 2, CUDA.DeviceMemory}}, SurfaceIntegralWeakForm{Tuple{FluxHydrostaticReconstruction{FluxLaxFriedrichs{typeof(max_abs_speed_naive)}, typeof(hydrostatic_reconstruction_audusse_etal)}, typeof(flux_nonconservative_audusse_etal)}}, VolumeIntegralShockCapturingHG{Tuple{typeof(flux_wintermeyer_etal), typeof(flux_nonconservative_wintermeyer_etal)}, Tuple{FluxHydrostaticReconstruction{FluxLaxFriedrichs{typeof(max_abs_speed_naive)}, typeof(hydrostatic_reconstruction_audusse_etal)}, typeof(flux_nonconservative_audusse_etal)}, IndicatorHennemannGassner{Float64, typeof(waterheight_pressure), @NamedTuple{alpha::Vector{Float64}, alpha_tmp::Vector{Float64}, indicator_threaded::Vector{Vector{Float64}}, modal_threaded::Vector{Vector{Float64}}}}}}, cache::@NamedTuple{elements::TrixiCUDA.ElementContainerGPU1D{Float64, Float64}, interfaces::TrixiCUDA.InterfaceContainerGPU1D{Float64}, boundaries::TrixiCUDA.BoundaryContainerGPU1D{Float64, Float64}, fstar1_L::CuArray{Float64, 3, CUDA.DeviceMemory}, fstar1_R::CuArray{Float64, 3, CUDA.DeviceMemory}})
@ TrixiCUDA ~/DGCFN.jl/TrixiCUDA.jl/src/solvers/dg_1d.jl:451
[21] top-level scope
@ ~/DGCFN.jl/TrixiCUDA.jl/debug_shallow.jl:80
in expression starting at /home/jovyan/DGCFN.jl/TrixiCUDA.jl/debug_shallow.jl:80
Originally posted by @huiyuxie in #70
Hi, the latest Trixi.jl and TrixiCUDA.jl is working! Thanks of your work! but I got the error when I do the test for problems below
MWE
error info
Below is the
Project.toml