Julia CUDA: LoadError: GPU broadcast resulted in non-concrete element type Any
I have to analyze some data using maximum likelihood methods, but CUDA doesn't like how I handle type instability. Any idea on how I could fix this? I tried my best in forcing concrete return types by declaring the type of every function argument, but it doesn't seem to work.
EDIT: I moved back some function declarations inside where they belonged. Here is an extract of the problematic part of the program:
function ln_likelihood( a_c::Float64,
a_p::Float64,
θ_1::Float64,
θ_2p::CuArray{Float64},
θ_2c::CuArray{Float64},
ϵ_p::CuArray{Float64},
σ_p::CuArray{Float64},
ϵ_c::CuArray{Float64},
σ_c::CuArray{Float64})
...
#return Float64
end
function trova_max_likelihood( θ_1::Float64,
θ_2p::CuArray{Float64},
θ_2c::CuArray{Float64},
ϵ_p::CuArray{Float64},
σ_p::CuArray{Float64},
ϵ_c::CuArray{Float64},
σ_c::CuArray{Float64})
...
function funzione_likelirobin(a_c::Float64, a_p::Float64)
global θ_1,θ_2p,θ_2c, ϵ_p, σ_p, ϵ_c, σ_c
ln_likelihood(a_c,a_p,θ_1,θ_2p,θ_2c, ϵ_p, σ_p, ϵ_c, σ_c)
end
funzione_likelihood(x::Tuple{Float64, Float64}) = funzione_likelirobin(x[1],x[2])
@code_warntype funzione_likelihood.(range)
#Where range::CuArray{Tuple{Float64,Float64}}
...
end
trova_max_likelihood(gθ_1, gθ_2p, gθ_2c, gϵ_p, gσ_p, gϵ_c, gσ_c)
And the output I get:
Variables
#self#::Core.Const(var"##dotfunction#274#175"{var"#funzione_likelihood#174"{var"#funzione_likelirobin#173"}}(var"#funzione_likelihood#174"{var"#funzione_likelirobin#173"}(var"#funzione_likelirobin#173"())))
x1::CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}
Body::Union{}
1 ─ %1 = Core.getfield(#self#, :funzione_likelihood)::Core.Const(var"#funzione_likelihood#174"{var"#funzione_likelirobin#173"}(var"#funzione_likelirobin#173"()))
│ %2 = Base.broadcasted(%1, x1)::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Nothing, var"#funzione_likelihood#174"{var"#funzione_likelirobin#173"}, Tuple{CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}}}
│ Base.materialize(%2)
└── Core.Const(:(return %3))
ERROR: LoadError: GPU broadcast resulted in non-concrete element type Any.
This probably means that the function you are broadcasting contains an error or type instability.
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:33
[2] copy
@ ~\.julia\packages\GPUArrays\gkF6S\src\host\broadcast.jl:44 [inlined]
[3] materialize
@ .\broadcast.jl:883 [inlined]
[4] trova_max_likelihood(θ_1::Float64, θ_2p::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, θ_2c::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ϵ_p::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, σ_p::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ϵ_c::CuArray{Float64, 1, C, σ_c::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ Main ~\Documents\GitHub\lab2\Lab2\Esercizio 5\esercizio5.jl:82
[5] top-level scope
@ ~\Documents\GitHub\lab2\Lab2\Esercizio 5\esercizio5.jl:99
[6] eval
@ .\boot.jl:360 [inlined]
[7] include_string(mapexpr::typeof(identity), mod::Module, code::String, filename::String)
@ Base .\loading.jl:1094
in expression starting at C:\Users\marce\Documents\GitHub\lab2\Lab2\Esercizio 5\esercizio5.jl:99
EDIT 2: I tried switching to regular arrays and the code above wouldn't work. I had to delete a line and define:
function funzione_likelirobin(a_c::Float64, a_p::Float64)
ln_likelihood(a_c,a_p,θ_1,θ_2p,θ_2c, ϵ_p, σ_p, ϵ_c, σ_c)
end
So I made the same change in the code with CuArray
s. The output I get is now:
Variables
#self#::var"##dotfunction#260#56"{var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}
x1::CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}
Body::CuArray{_A, 1, CUDA.Mem.DeviceBuffer} where _A
1 ─ %1 = Core.getfield(#self#, :funzione_likelihood)::var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}
│ %2 = Base.broadcasted(%1, x1)::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Nothing, var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Tuple{CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}}}
│ %3 = Base.materialize(%2)::CuArray{_A, 1, CUDA.Mem.DeviceBuffer} where _A
└── return %3
ERROR: LoadError: InvalidIRError: compiling kernel broadcast_kernel(CUDA.CuKernelContext, CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}}, var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}}}, Tuple{Base.Broadcast.Extruded{CuDeviceVector{Tuple{Float64, Float64}, 1}, Tuple{Bool}, Tuple{Int64}}}}, Int64) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to ln_likelihood)
Stacktrace:
[1] funzione_likelirobin
@ ~\Documents\GitHub\lab2\Lab2\Esercizio 5\esercizio5.jl:76
[2] funzione_likelihood
@ ~\Documents\GitHub\lab2\Lab2\Esercizio 5\esercizio5.jl:79
[3] _broadcast_getindex_evalf
@ .\broadcast.jl:648
[4] _broadcast_getindex
@ .\broadcast.jl:621
[5] getindex
@ .\broadcast.jl:575
[6] broadcast_kernel
@ ~\.julia\packages\GPUArrays\gkF6S\src\host\broadcast.jl:59
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{GPUArrays.var"#broadcast_kernel#17", Tuple{CUDA.CuKernelContext, CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}}, var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}}}, Tuple{Base.Broadcast.Extruded{CuDeviceVector{Tuple{Float64, Float64}, 1}, Tuple{Bool}, Tuple{Int64}}}}, Int64}}}, args::LLVM.Module)
@ GPUCompiler ~\.julia\packages\GPUCompiler\HeCT6\src\validation.jl:111
[2] macro expansion
@ ~\.julia\packages\GPUCompiler\HeCT6\src\driver.jl:326 [inlined]
[3] macro expansion
@ ~\.julia\packages\TimerOutputs\YJq3h\src\TimerOutput.jl:252 [inlined]
[4] macro expansion
@ ~\.julia\packages\GPUCompiler\HeCT6\src\driver.jl:324 [inlined]
[5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler ~\.julia\packages\GPUCompiler\HeCT6\src\utils.jl:64
[6] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA ~\.julia\packages\CUDA\sCev8\src\compiler\execution.jl:326
[7] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler ~\.julia\packages\GPUCompiler\HeCT6\src\cache.jl:90
[8] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}}, var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}}}, Tuple{Base.Broadcast.Extruded{CuDeviceVector{Tuple{Float64, Float64}, 1}, Tuple{Bool}, Tuple{Int64}}}}, Int64}}; name::Nothing, kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA ~\.julia\packages\CUDA\sCev8\src\compiler\execution.jl:297
[9] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}}, var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}, CuDeviceVector{Float64, 1}}}, Tuple{Base.Broadcast.Extruded{CuDeviceVector{Tuple{Float64, Float64}, 1}, Tuple{Bool}, Tuple{Int64}}}}, Int64}})
@ CUDA ~\.julia\packages\CUDA\sCev8\src\compiler\execution.jl:291
[10] macro expansion
@ ~\.julia\packages\CUDA\sCev8\src\compiler\execution.jl:102 [inlined]
[11] launch_heuristic(::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#17", ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}}, var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Tuple{Base.Broadcast.Extruded{CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}, Tuple{Bool}, Tuple{Int64}}}}, ::Int64; elements::Int64, elements_per_thread::Int64)
@ CUDA ~\.julia\packages\CUDA\sCev8\src\gpuarrays.jl:17
[12] copyto!(dest::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, bc::Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}}, var"#funzione_likelihood#55"{var"#funzione_likelirobin#54"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Tuple{CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}}})
@ GPUArrays ~\.julia\packages\GPUArrays\gkF6S\src\host\broadcast.jl:65
in expression starting at C:\Users\marce\Documents\GitHub\lab2\Lab2\Esercizio 5\esercizio5.jl:100
Solution 1:
The code you have provided seems to be a bit short of an MWE. However, filling in some random data of the specified types, we have:
using CUDA
a_c, a_p, θ_1 = rand(3)
N = 1000
θ_2p, θ_2c, ϵ_p, σ_p, ϵ_c, σ_c = ntuple(x->CUDA.randn(Float64, N), 6)
function ln_likelihood( a_c::Float64,
a_p::Float64,
θ_1::Float64,
θ_2p::CuArray{Float64},
θ_2c::CuArray{Float64},
ϵ_p::CuArray{Float64},
σ_p::CuArray{Float64},
ϵ_c::CuArray{Float64},
σ_c::CuArray{Float64})
# ...
return a_c + a_p + θ_1 + sum(θ_2p) + sum(θ_2c) + sum(ϵ_p) + sum(σ_p) + sum(ϵ_c) + sum(σ_c)
end
function trova_max_likelihood( θ_1::Float64,
θ_2p::CuArray{Float64},
θ_2c::CuArray{Float64},
ϵ_p::CuArray{Float64},
σ_p::CuArray{Float64},
ϵ_c::CuArray{Float64},
σ_c::CuArray{Float64})
# ...
function funzione_likelirobin(a_c::Float64, a_p::Float64)
global θ_1, θ_2p, θ_2c, ϵ_p, σ_p, ϵ_c, σ_c
ln_likelihood(a_c, a_p, θ_1, θ_2p, θ_2c, ϵ_p, σ_p, ϵ_c, σ_c)
end
funzione_likelihood(x::Tuple{Float64, Float64}) = funzione_likelirobin(x[1],x[2])
# Better make a range if we want to broadcast over it
range = CUDA.fill((1., 2.), 10)
@code_warntype funzione_likelihood.(range)
#Where range::CuArray{Tuple{Float64,Float64}}
end
which runs for me without any error, and gives blue, stably-inferred types in the @code_warntype
output:
julia> trova_max_likelihood(θ_1, θ_2p, θ_2c, ϵ_p, σ_p, ϵ_c, σ_c)
MethodInstance for (::var"##dotfunction#413#25"{var"#funzione_likelihood#24"{var"#funzione_likelirobin#23"}})(::CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer})
from (::var"##dotfunction#413#25")(x1) in Main
Arguments
#self#::Core.Const(var"##dotfunction#413#25"{var"#funzione_likelihood#24"{var"#funzione_likelirobin#23"}}(var"#funzione_likelihood#24"{var"#funzione_likelirobin#23"}(var"#funzione_likelirobin#23"())))
x1::CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}
Body::Union{}
1 ─ %1 = Core.getfield(#self#, :funzione_likelihood)::Core.Const(var"#funzione_likelihood#24"{var"#funzione_likelirobin#23"}(var"#funzione_likelirobin#23"()))
│ %2 = Base.broadcasted(%1, x1)::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Nothing, var"#funzione_likelihood#24"{var"#funzione_likelirobin#23"}, Tuple{CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}}}
│ Base.materialize(%2)
└── Core.Const(:(return %3))
So it would seem that the instability is likely coming from somewhere in the code you have elided with ...
.
That said, I would heavily recommend avoiding global variables; either just specifying the variables explicitly in the function signature, or if you have to, capturing local variables in a closure, would be preferable to using globals -- which can be a major source of type-instability.
Using a closure instead of globals as follows
using CUDA
a_c, a_p, θ_1 = rand(3)
N = 1000
θ_2p, θ_2c, ϵ_p, σ_p, ϵ_c, σ_c = ntuple(x->CUDA.randn(Float64, N), 6)
function ln_likelihood( a_c::Float64,
a_p::Float64,
θ_1::Float64,
θ_2p::CuArray{Float64},
θ_2c::CuArray{Float64},
ϵ_p::CuArray{Float64},
σ_p::CuArray{Float64},
ϵ_c::CuArray{Float64},
σ_c::CuArray{Float64})
# ...
return a_c + a_p + θ_1 + sum(θ_2p) + sum(θ_2c) + sum(ϵ_p) + sum(σ_p) + sum(ϵ_c) + sum(σ_c)
end
function trova_max_likelihood( θ_1::Float64,
θ_2p::CuArray{Float64},
θ_2c::CuArray{Float64},
ϵ_p::CuArray{Float64},
σ_p::CuArray{Float64},
ϵ_c::CuArray{Float64},
σ_c::CuArray{Float64})
# ...
function funzione_likelirobin(a_c::Float64, a_p::Float64)
ln_likelihood(a_c, a_p, θ_1, θ_2p, θ_2c, ϵ_p, σ_p, ϵ_c, σ_c)
end
funzione_likelihood(x::Tuple{Float64, Float64}) = funzione_likelirobin(x[1],x[2])
# Better make a range if we want to broadcast over it
range = CUDA.fill((1., 2.), 10)
@code_warntype funzione_likelihood.(range)
#Where range::CuArray{Tuple{Float64,Float64}}
end
yields a slightly different @code_warntype
output, but still with blue, stably-inferred types and no error
julia> trova_max_likelihood(θ_1, θ_2p, θ_2c, ϵ_p, σ_p, ϵ_c, σ_c)
MethodInstance for (::var"##dotfunction#414#30"{var"#funzione_likelihood#29"{var"#funzione_likelirobin#28"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}})(::CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer})
from (::var"##dotfunction#414#30")(x1) in Main
Arguments
#self#::var"##dotfunction#414#30"{var"#funzione_likelihood#29"{var"#funzione_likelirobin#28"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}
x1::CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}
Body::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}
1 ─ %1 = Core.getfield(#self#, :funzione_likelihood)::var"#funzione_likelihood#29"{var"#funzione_likelirobin#28"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}
│ %2 = Base.broadcasted(%1, x1)::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Nothing, var"#funzione_likelihood#29"{var"#funzione_likelirobin#28"{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Tuple{CuArray{Tuple{Float64, Float64}, 1, CUDA.Mem.DeviceBuffer}}}
│ %3 = Base.materialize(%2)::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}
└── return %3