function createnet()
u = Unet()
w = rand(Float32, 256,256,1,1)
w′ = rand(Float32, 256,256,1,1)
function loss(x, y)
op = clamp.(u(x), 0.001f0, 1.f0)
mean(bce(op, y))
end
rep = Iterators.repeated((w, w′), 10)
opt = Momentum()
Momentum(0.01, 0.9, IdDict{Any,Any}())
Flux.train!(loss, Flux.params(u), rep, opt)
end
julia> createnet()
ERROR: GPU compilation of kernel #broadcast_kernel#17(CUDA.CuKernelContext, CUDA.CuDeviceArray{Float32, 4, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{4}, NTuple{4, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{Array{Float32, 4}, NTuple{4, Bool}, NTuple{4, Int64}}, Base.Broadcast.Extruded{CUDA.CuDeviceArray{Float32, 4, 1}, NTuple{4, Bool}, NTuple{4, Int64}}}}, Int64) failed
KernelError: passing and using non-bitstype argument
Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{4}, NTuple{4, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{Array{Float32, 4}, NTuple{4, Bool}, NTuple{4, Int64}}, Base.Broadcast.Extruded{CUDA.CuDeviceArray{Float32, 4, 1}, NTuple{4, Bool}, NTuple{4, Int64}}}}, which is not isbits:
.args is of type Tuple{Base.Broadcast.Extruded{Array{Float32, 4}, NTuple{4, Bool}, NTuple{4, Int64}}, Base.Broadcast.Extruded{CUDA.CuDeviceArray{Float32, 4, 1}, NTuple{4, Bool}, NTuple{4, Int64}}} which is not isbits.
.1 is of type Base.Broadcast.Extruded{Array{Float32, 4}, NTuple{4, Bool}, NTuple{4, Int64}} which is not isbits.
.x is of type Array{Float32, 4} which is not isbits.
Stacktrace:
[1] check_invocation(job::GPUCompiler.CompilerJob)
@ GPUCompiler C:\Users\seatt\.julia\packages\GPUCompiler\jVY4I\src\validation.jl:88
[2] macro expansion
@ C:\Users\seatt\.julia\packages\GPUCompiler\jVY4I\src\driver.jl:417 [inlined]
[3] macro expansion
@ C:\Users\seatt\.julia\packages\TimerOutputs\jgSVI\src\TimerOutput.jl:252 [inlined]
[4] macro expansion
@ C:\Users\seatt\.julia\packages\GPUCompiler\jVY4I\src\driver.jl:416 [inlined]
[5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler C:\Users\seatt\.julia\packages\GPUCompiler\jVY4I\src\utils.jl:64
[6] cufunction_compile(job::GPUCompiler.CompilerJob, ctx::LLVM.Context)
@ CUDA C:\Users\seatt\.julia\packages\CUDA\DfvRa\src\compiler\execution.jl:354
[7] #224
@ C:\Users\seatt\.julia\packages\CUDA\DfvRa\src\compiler\execution.jl:347 [inlined]
[8] JuliaContext(f::CUDA.var"#224#225"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{GPUArrays.var"#broadcast_kernel#17", Tuple{CUDA.CuKernelContext, CUDA.CuDeviceArray{Float32, 4, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{4}, NTuple{4, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{Array{Float32, 4}, NTuple{4, Bool}, NTuple{4, Int64}}, Base.Broadcast.Extruded{CUDA.CuDeviceArray{Float32, 4, 1}, NTuple{4, Bool}, NTuple{4, Int64}}}}, Int64}}}})
@ GPUCompiler C:\Users\seatt\.julia\packages\GPUCompiler\jVY4I\src\driver.jl:76
[9] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA C:\Users\seatt\.julia\packages\CUDA\DfvRa\src\compiler\execution.jl:346
[10] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler C:\Users\seatt\.julia\packages\GPUCompiler\jVY4I\src\cache.jl:90
[11] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CUDA.CuDeviceArray{Float32, 4, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{4}, NTuple{4, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{Array{Float32, 4}, NTuple{4, Bool}, NTuple{4, Int64}}, Base.Broadcast.Extruded{CUDA.CuDeviceArray{Float32, 4, 1}, NTuple{4, Bool}, NTuple{4, Int64}}}}, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA C:\Users\seatt\.julia\packages\CUDA\DfvRa\src\compiler\execution.jl:299
[12] cufunction
@ C:\Users\seatt\.julia\packages\CUDA\DfvRa\src\compiler\execution.jl:293 [inlined]
[13] macro expansion
@ C:\Users\seatt\.julia\packages\CUDA\DfvRa\src\compiler\execution.jl:102 [inlined]
[14] #launch_heuristic#248
@ C:\Users\seatt\.julia\packages\CUDA\DfvRa\src\gpuarrays.jl:17 [inlined]
[15] _copyto!
@ C:\Users\seatt\.julia\packages\GPUArrays\Hyss4\src\host\broadcast.jl:63 [inlined]
[16] copyto!
@ C:\Users\seatt\.julia\packages\GPUArrays\Hyss4\src\host\broadcast.jl:46 [inlined]
[17] copy
@ C:\Users\seatt\.julia\packages\GPUArrays\Hyss4\src\host\broadcast.jl:37 [inlined]
[18] materialize
@ .\broadcast.jl:860 [inlined]
[19] broadcast(::typeof(+), ::Array{Float32, 4}, ::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
@ Base.Broadcast .\broadcast.jl:798
[20] adjoint
@ C:\Users\seatt\.julia\packages\Zygote\D7j8v\src\lib\broadcast.jl:74 [inlined]
[21] _pullback
@ Zygote C:\Users\seatt\.julia\packages\Zygote\D7j8v\src\compiler\interface.jl:352
[34] gradient(f::Function, args::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
@ Zygote C:\Users\seatt\.julia\packages\Zygote\D7j8v\src\compiler\interface.jl:75
[35] macro expansion @ C:\Users\seatt\.julia\packages\Flux\7nTyc\src\optimise\train.jl:109 [inlined]
[36] macro expansion
@ C:\Users\seatt\.julia\packages\Juno\n6wyj\src\progress.jl:134 [inlined]
[37] train!(loss::Function, ps::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}}, data::Base.Iterators.Take{Base.Iterators.Repeated{Tuple{Array{Float32, 4}, Array{Float32, 4}}}}, opt::Flux.Optimise.Momentum; cb::Flux.Optimise.var"#40#46")
@ Flux.Optimise C:\Users\seatt\.julia\packages\Flux\7nTyc\src\optimise\train.jl:107
[38] train!
@ C:\Users\seatt\.julia\packages\Flux\7nTyc\src\optimise\train.jl:105 [inlined]
[39] createnet()
@ VSCodeFeatures c:\Users\seatt\Source\VSCodeFeatures\src\VSCodeFeatures.jl:165
[40] top-level scope
@ REPL[4]:1