From ceccac559b5d40891b66fc0405828fce9e5c8d57 Mon Sep 17 00:00:00 2001 From: Dale Black Date: Thu, 26 Dec 2024 09:25:37 -0800 Subject: [PATCH 1/5] replace (gpu) boolean_indicator with AK.jl version --- src/utils.jl | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 9e086d5..0fe2dbd 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,5 +1,8 @@ using KernelAbstractions using GPUArraysCore: AbstractGPUArray +import AcceleratedKernels as AK + +export boolean_indicator """ ## `boolean_indicator` @@ -46,25 +49,17 @@ function boolean_indicator(f) end function boolean_indicator(f::BitArray) - f_new = similar(f, Float32) - for i in CartesianIndices(f_new) - @inbounds f_new[i] = f[i] ? 0.0f0 : 1.0f10 - end - return f_new -end - -@kernel function boolean_indicator_kernel(f, output) - i = @index(Global) - output[i] = ifelse(f[i] == 0, 1.0f10, 0.0f0) + f_new = similar(f, Float32) + for i in eachindex(f) + @inbounds f_new[i] = f[i] ? 0.0f0 : 1.0f10 + end + return f_new end function boolean_indicator(f::AbstractGPUArray) - backend = get_backend(f) - output = similar(f, Float32) - kernel = boolean_indicator_kernel(backend) - kernel(f, output, ndrange=size(f)) - KernelAbstractions.synchronize(backend) - return output -end - -export boolean_indicator + f_new = similar(f, Float32) + AK.foreachindex(f) do i + @inbounds f_new[i] = f[i] == false ? 1.0f10 : 0.0f0 + end + return f_new +end \ No newline at end of file From 6c3e8929beb84c1fac36970ef66cf00d9c15c0b2 Mon Sep 17 00:00:00 2001 From: Dale Black Date: Thu, 26 Dec 2024 09:25:46 -0800 Subject: [PATCH 2/5] add AK.jl --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index 4259a10..d8ac364 100644 --- a/Project.toml +++ b/Project.toml @@ -4,11 +4,13 @@ authors = ["Dale and contributors"] version = "0.2.2" [deps] +AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" [compat] +AcceleratedKernels = "0.2" GPUArraysCore = "0.1" KernelAbstractions = "0.9" julia = "1.10" From d2f67c7e8c48b50df9723b3f036b30a07bab602b Mon Sep 17 00:00:00 2001 From: Dale Black Date: Thu, 26 Dec 2024 10:46:02 -0800 Subject: [PATCH 3/5] replace with AcceleratedKernels.jl --- src/transform.jl | 447 ++++++++++++++++++---------------------------- src/utils.jl | 1 - test/runtests.jl | 8 + test/transform.jl | 402 ++++++++++++++++++++--------------------- 4 files changed, 383 insertions(+), 475 deletions(-) diff --git a/src/transform.jl b/src/transform.jl index ec3ee8d..8bc32bd 100644 --- a/src/transform.jl +++ b/src/transform.jl @@ -1,5 +1,8 @@ +import KernelAbstractions as KA +import AcceleratedKernels as AK using GPUArraysCore: AbstractGPUVector, AbstractGPUMatrix, AbstractGPUArray -using KernelAbstractions + +export transform!, transform """ ## `transform!` @@ -37,267 +40,153 @@ transform!(f_bool, output, v, z) ``` """ function transform!(f::AbstractVector, output, v, z) - z[1] = -Inf32 - z[2] = Inf32 - - k = 1 - @inbounds for q in 2:length(f) - s = ((f[q] + q^2) - (f[v[k]] + v[k]^2)) / (2 * q - 2 * v[k]) - while s ≤ z[k] - k -= 1 - s = ((f[q] + q^2) - (f[v[k]] + v[k]^2)) / (2 * q - 2 * v[k]) - end - k += 1 - v[k] = q - z[k] = s - z[k+1] = Inf32 - end - - k = 1 - @inbounds for q in 1:length(f) - while z[k+1] < q - k += 1 - end - output[q] = (q - v[k])^2 + f[v[k]] - end + z[1] = -Inf32 + z[2] = Inf32 + + k = 1 + @inbounds for q in 2:length(f) + s = ((f[q] + q^2) - (f[v[k]] + v[k]^2)) / (2 * q - 2 * v[k]) + while s ≤ z[k] + k -= 1 + s = ((f[q] + q^2) - (f[v[k]] + v[k]^2)) / (2 * q - 2 * v[k]) + end + k += 1 + v[k] = q + z[k] = s + z[k+1] = Inf32 + end + + k = 1 + @inbounds for q in 1:length(f) + while z[k+1] < q + k += 1 + end + output[q] = (q - v[k])^2 + f[v[k]] + end end # 2D -function transform!(img::AbstractMatrix, output, v, z; threaded = true) - if threaded - Threads.@threads for i in CartesianIndices(@view(img[:, 1])) - @views transform!(img[i, :], output[i, :], v[i, :], z[i, :]) - end - - copyto!(img, output) - Threads.@threads for j in CartesianIndices(@view(img[1, :])) - @views transform!( - img[:, j], output[:, j], fill!(v[:, j], 1), fill!(z[:, j], 1), - ) - end - else - for i in CartesianIndices(@view(img[:, 1])) - @views transform!(img[i, :], output[i, :], v[i, :], z[i, :]) - end - - copyto!(img, output) - for j in CartesianIndices(@view(img[1, :])) - @views transform!( - img[:, j], output[:, j], fill!(v[:, j], 1), fill!(z[:, j], 1), - ) - end - end -end - -# 3D -function transform!(vol::AbstractArray{<:Real, 3}, output, v, z; threaded = true) - if threaded - Threads.@threads for i in CartesianIndices(@view(vol[:, 1, 1])) - @views transform!(vol[i, :, :], output[i, :, :], v[i, :, :], z[i, :, :]) - end - - copyto!(vol, output) - Threads.@threads for idx in CartesianIndices(@view(vol[1, :, :])) - j, k = Tuple(idx) - @views transform!( - vol[:, j, k], output[:, j, k], fill!(v[:, j, k], 1), fill!(z[:, j, k], 1), - ) - end - else - for i in CartesianIndices(@view(vol[:, 1, 1])) - @views transform!(vol[i, :, :], output[i, :, :], v[i, :, :], z[i, :, :]) - end - - copyto!(vol, output) - for idx in CartesianIndices(@view(vol[1, :, :])) - j, k = Tuple(idx) - @views transform!( - vol[:, j, k], output[:, j, k], fill!(v[:, j, k], 1), fill!(z[:, j, k], 1), - ) - end - end -end - -# GPU (2D) -@kernel function _first_pass_2D!(f, out, s2) - row, col = @index(Global, NTuple) - - if f[row, col] < 0.5f0 - ct = 1 - curr_l = min(col - 1, s2 - col) - finished = false - while !finished && ct <= curr_l - if f[row, col-ct] > 0.5f0 || f[row, col+ct] > 0.5f0 - out[row, col] = ct * ct - finished = true - end - ct += 1 - end - while !finished && ct < col - if f[row, col-ct] > 0.5f0 - out[row, col] = ct * ct - finished = true - end - ct += 1 - end - while !finished && col + ct <= s2 - if f[row, col+ct] > 0.5f0 - out[row, col] = ct * ct - finished = true - end - ct += 1 - end - if !finished - out[row, col] = 1.0f10 - end - else - out[row, col] = 0.0f0 - end -end - -@kernel function _second_pass_2D!(org, out, s1, s2) - row, col = @index(Global, NTuple) - - ct = 1 - curr_l = sqrt(out[row, col]) - while ct < curr_l && row + ct <= s1 - temp = muladd(ct, ct, org[row+ct, col]) - if temp < out[row, col] - out[row, col] = temp - curr_l = sqrt(temp) - end - ct += 1 - end - - ct = 1 - while ct < curr_l && row > ct - temp = muladd(ct, ct, org[row-ct, col]) - if temp < out[row, col] - out[row, col] = temp - curr_l = sqrt(temp) - end - ct += 1 - end +function transform!(img::AbstractMatrix, output, v, z; threaded=true) + if threaded + Threads.@threads for i in eachindex(@view(img[:, 1])) + @views transform!(img[i, :], output[i, :], v[i, :], z[i, :]) + end + + copyto!(img, output) + + Threads.@threads for j in eachindex(@view(img[1, :])) + @views transform!( + img[:, j], output[:, j], fill!(v[:, j], 1), fill!(z[:, j], 1) + ) + end + else + for i in eachindex(@view(img[:, 1])) + @views transform!(img[i, :], output[i, :], v[i, :], z[i, :]) + end + + copyto!(img, output) + + for j in eachindex(@view(img[1, :])) + @views transform!( + img[:, j], output[:, j], fill!(v[:, j], 1), fill!(z[:, j], 1) + ) + end + end end -function transform!(img::AbstractGPUMatrix, output) - s1, s2 = size(img) - backend = get_backend(img) - kernel1! = _first_pass_2D!(backend) - kernel2! = _second_pass_2D!(backend) - - kernel1!(img, output, s2, ndrange = (s1, s2)) - copyto!(img, output) - - kernel2!(img, output, s1, s2, ndrange = (s1, s2)) - KernelAbstractions.synchronize(backend) +function transform!(vol::AbstractArray{<:Real,3}, output, v, z; threaded=true) + if threaded + # X dimension + Threads.@threads for i in axes(vol, 1) + for j in axes(vol, 2) + @views transform!(vol[i, j, :], output[i, j, :], v[i, j, :], z[i, j, :]) + end + end + + copyto!(vol, output) + + # Y dimension + Threads.@threads for i in axes(vol, 1) + for k in axes(vol, 3) + @views transform!(vol[i, :, k], output[i, :, k], fill!(v[i, :, k], 1), fill!(z[i, :, k], 1)) + end + end + + copyto!(vol, output) + + # Z dimension + Threads.@threads for j in axes(vol, 2) + for k in axes(vol, 3) + @views transform!(vol[:, j, k], output[:, j, k], fill!(v[:, j, k], 1), fill!(z[:, j, k], 1)) + end + end + else + # X dimension + for i in axes(vol, 1) + for j in axes(vol, 2) + @views transform!(vol[i, j, :], output[i, j, :], v[i, j, :], z[i, j, :]) + end + end + + copyto!(vol, output) + + # Y dimension + for i in axes(vol, 1) + for k in axes(vol, 3) + @views transform!(vol[i, :, k], output[i, :, k], fill!(v[i, :, k], 1), fill!(z[i, :, k], 1)) + end + end + + copyto!(vol, output) + + # Z dimension + for j in axes(vol, 2) + for k in axes(vol, 3) + @views transform!(vol[:, j, k], output[:, j, k], fill!(v[:, j, k], 1), fill!(z[:, j, k], 1)) + end + end + end end -# GPU (3D) -@kernel function _first_pass_3D!(f, out, s2) - dim1, dim2, dim3 = @index(Global, NTuple) - # 1D along dimension 2 - if f[dim1, dim2, dim3] < 0.5f0 - ct = 1 - curr_l = min(dim2 - 1, s2 - dim2) - finished = false - while !finished && ct <= curr_l - if f[dim1, dim2-ct, dim3] > 0.5f0 || f[dim1, dim2+ct, dim3] > 0.5f0 - out[dim1, dim2, dim3] = ct * ct - finished = true - end - ct += 1 - end - while !finished && ct < dim2 - if f[dim1, dim2-ct, dim3] > 0.5f0 - out[dim1, dim2, dim3] = ct * ct - finished = true - end - ct += 1 - end - while !finished && dim2 + ct <= s2 - if f[dim1, dim2+ct, dim3] > 0.5f0 - out[dim1, dim2, dim3] = ct * ct - finished = true - end - ct += 1 - end - if !finished - out[dim1, dim2, dim3] = 1.0f10 - end - else - out[dim1, dim2, dim3] = 0.0f0 - end -end +function transform!(img::AbstractGPUMatrix, output, v, z) + AK.foreachindex(@view(img[:, 1])) do i + @views transform!(img[i, :], output[i, :], v[i, :], z[i, :]) + end -@kernel function _second_pass_3D!(org, out, s1) - dim1, dim2, dim3 = @index(Global, NTuple) - # 2D along dimension 1 - ct = 1 - curr_l = sqrt(out[dim1, dim2, dim3]) - while ct < curr_l && dim1 + ct <= s1 - temp = muladd(ct, ct, org[dim1+ct, dim2, dim3]) - if temp < out[dim1, dim2, dim3] - out[dim1, dim2, dim3] = temp - curr_l = sqrt(temp) - end - ct += 1 - end - ct = 1 - while ct < curr_l && dim1 - ct > 0 - temp = muladd(ct, ct, org[dim1-ct, dim2, dim3]) - if temp < out[dim1, dim2, dim3] - out[dim1, dim2, dim3] = temp - curr_l = sqrt(temp) - end - ct += 1 - end -end + copyto!(img, output) -@kernel function _third_pass_3D!(org, out, s3) - dim1, dim2, dim3 = @index(Global, NTuple) - # 2D along dimension 3 - ct = 1 - curr_l = sqrt(out[dim1, dim2, dim3]) - while ct < curr_l && dim3 + ct <= s3 - temp = muladd(ct, ct, org[dim1, dim2, dim3+ct]) - if temp < out[dim1, dim2, dim3] - out[dim1, dim2, dim3] = temp - curr_l = sqrt(temp) - end - ct += 1 - end - ct = 1 - while ct < curr_l && ct < dim3 - temp = muladd(ct, ct, org[dim1, dim2, dim3-ct]) - if temp < out[dim1, dim2, dim3] - out[dim1, dim2, dim3] = temp - curr_l = sqrt(temp) - end - ct += 1 - end + AK.foreachindex(@view(img[1, :])) do j + @views transform!(img[:, j], output[:, j], fill!(v[:, j], 1), fill!(z[:, j], 1)) + end end -function transform!(img::AbstractGPUArray, output) - backend = get_backend(img) - s1, s2, s3 = size(img) - kernel1! = _first_pass_3D!(backend) - kernel2! = _second_pass_3D!(backend) - kernel3! = _third_pass_3D!(backend) - - kernel1!(img, output, s2, ndrange = (s1, s2, s3)) - copyto!(img, output) - - kernel2!(img, output, s1, ndrange = (s1, s2, s3)) - copyto!(img, output) - - kernel3!(img, output, s3, ndrange = (s1, s2, s3)) - - KernelAbstractions.synchronize(backend) +function transform!(vol::AbstractGPUArray{T,3}, output, v, z) where {T} + # X dimension + AK.foreachindex(@view(vol[:, 1, 1])) do i + for j in axes(vol, 2) + @views transform!(vol[i, j, :], output[i, j, :], v[i, j, :], z[i, j, :]) + end + end + + copyto!(vol, output) + + # Y dimension + AK.foreachindex(@view(vol[:, 1, 1])) do i + for k in axes(vol, 3) + @views transform!(vol[i, :, k], output[i, :, k], fill!(v[i, :, k], 1), fill!(z[i, :, k], 1)) + end + end + + copyto!(vol, output) + + # Z dimension + AK.foreachindex(@view(vol[1, :, 1])) do j + for k in axes(vol, 3) + @views transform!(vol[:, j, k], output[:, j, k], fill!(v[:, j, k], 1), fill!(z[:, j, k], 1)) + end + end end -export transform! - """ ## `transform` @@ -328,46 +217,54 @@ f_tfm = transform(f_bool) ``` """ function transform(f::AbstractVector) - output = similar(f, eltype(f)) - v = ones(Int32, length(f)) - z = ones(eltype(f), length(f) + 1) + output = similar(f, eltype(f)) + v = ones(Int32, length(f)) + z = ones(eltype(f), length(f) + 1) - transform!(f, output, v, z) - return output + transform!(f, output, v, z) + return output end # 2D -function transform(img::AbstractMatrix; threaded = true) - output = similar(img, eltype(img)) - v = ones(Int32, size(img)) - z = ones(eltype(img), size(img) .+ 1) +function transform(img::AbstractMatrix; threaded=true) + output = similar(img, eltype(img)) + v = ones(Int32, size(img)) + z = ones(eltype(img), size(img) .+ 1) - transform!(img, output, v, z; threaded = threaded) - return output + transform!(img, output, v, z; threaded=threaded) + return output end # 3D -function transform(vol::AbstractArray{<:Real, 3}; threaded = true) - output = similar(vol, eltype(vol)) - v = ones(Int32, size(vol)) - z = ones(eltype(vol), size(vol) .+ 1) +function transform(vol::AbstractArray{<:Real,3}; threaded=true) + output = similar(vol, eltype(vol)) + v = ones(Int32, size(vol)) + z = ones(eltype(vol), size(vol) .+ 1) - transform!(vol, output, v, z; threaded = threaded) - return output + transform!(vol, output, v, z; threaded=threaded) + return output end # GPU (2D) function transform(img::AbstractGPUMatrix) - output = similar(img, Float32) - transform!(img, output) - return output + backend = KA.get_backend(img) + + output = similar(img, Float32) + v = KA.ones(backend, Int32, size(img)) + z = KA.ones(backend, eltype(img), size(img) .+ 1) + + transform!(img, output, v, z) + return output end # GPU (3D) -function transform(img::AbstractGPUArray) - output = similar(img, Float32) - transform!(img, output) - return output -end +function transform(vol::AbstractGPUArray{T,3}) where {T} + backend = KA.get_backend(vol) + + output = similar(vol, Float32) + v = KA.ones(backend, Int32, size(vol)) + z = KA.ones(backend, eltype(vol), size(vol) .+ 1) -export transform + transform!(vol, output, v, z) + return output +end diff --git a/src/utils.jl b/src/utils.jl index 0fe2dbd..f4c00aa 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,4 +1,3 @@ -using KernelAbstractions using GPUArraysCore: AbstractGPUArray import AcceleratedKernels as AK diff --git a/test/runtests.jl b/test/runtests.jl index aab6648..a2f1deb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,14 @@ using Test using KernelAbstractions using Random +#= +To run the tests locally, and still test a GPU backend (e.g. Metal), use the following command: +``` +using Pkg +Pkg.test("DistanceTransforms", test_args=["Metal"]) +``` +=# + AVAILABLE_GPU_BACKENDS = ["CUDA", "AMDGPU", "Metal", "oneAPI"] TEST_BACKENDS = filter(x->x in [AVAILABLE_GPU_BACKENDS; "CPU"], ARGS) diff --git a/test/transform.jl b/test/transform.jl index 7f45af5..027b185 100644 --- a/test/transform.jl +++ b/test/transform.jl @@ -3,206 +3,210 @@ using ImageMorphology: distance_transform, feature_transform test_transform(img) = distance_transform(feature_transform(Bool.(img))) @testset "transform!" begin - @testset "1D transform!" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0f0, 1f0], n) - - img_bool = boolean_indicator(img) - output = similar(img, Float32) - v = ones(Int32, size(img)) - z = ones(Float32, size(img) .+ 1) - - transform!(img_bool, output, v, z) - img_test = test_transform(img) .^ 2 - - @test Array(output) ≈ img_test - end - end - end - - @testset "2D transform!" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0f0, 1f0], n, n) - - img_bool = boolean_indicator(img) - output = similar(img, Float32) - v = ones(Int32, size(img)) - z = ones(Float32, size(img) .+ 1) - - transform!(img_bool, output, v, z) - img_test = test_transform(img) .^ 2 - - @test Array(output) ≈ img_test - end - - # non-threaded - for test_idx in 1:20 - img = rand([0f0, 1f0], n, n) - - img_bool = boolean_indicator(img) - output = similar(img, Float32) - v = ones(Int32, size(img)) - z = ones(Float32, size(img) .+ 1) - - transform!(img_bool, output, v, z; threaded = false) - img_test = test_transform(img) .^ 2 - - @test Array(output) ≈ img_test - end - end - end - - @testset "3D transform!" begin - for n in [10, 100] - for test_idx in 1:5 - img = rand([0f0, 1f0], n, n, n) - - img_bool = boolean_indicator(img) - output = similar(img, Float32) - v = ones(Int32, size(img)) - z = ones(Float32, size(img) .+ 1) - - transform!(img_bool, output, v, z) - img_test = test_transform(img) .^ 2 - - @test Array(output) ≈ img_test - end - - # non-threaded - for test_idx in 1:5 - img = rand([0f0, 1f0], n, n, n) - - img_bool = boolean_indicator(img) - output = similar(img, Float32) - v = ones(Int32, size(img)) - z = ones(Float32, size(img) .+ 1) - - transform!(img_bool, output, v, z; threaded = false) - img_test = test_transform(img) .^ 2 - - @test Array(output) ≈ img_test - end - end - end - - if dev != Array - @testset "2D GPU transform!" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0f0, 1f0], n, n) - - img_gpu = dev(copy(img)) - output = similar(img_gpu) - - transform!(img_gpu, output) - img_test = test_transform(img) .^ 2 - - @test Array(output) ≈ img_test - end - end - end - @testset "3D GPU transform!" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0f0, 1f0], n, n, n) - - img_gpu = dev(copy(img)) - output = similar(img_gpu) - - transform!(img_gpu, output) - img_test = test_transform(img) .^ 2 - - @test Array(output) ≈ img_test - end - end - end - else - @info "No GPU available, skipping tests" - end + @testset "1D transform!" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = rand([0.0f0, 1.0f0], n) + + img_bool = boolean_indicator(img) + output = similar(img, Float32) + v = ones(Int32, size(img)) + z = ones(Float32, size(img) .+ 1) + + transform!(img_bool, output, v, z) + img_test = test_transform(img) .^ 2 + + @test Array(output) ≈ img_test + end + end + end + + @testset "2D transform!" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = rand([0.0f0, 1.0f0], n, n) + + img_bool = boolean_indicator(img) + output = similar(img, Float32) + v = ones(Int32, size(img)) + z = ones(Float32, size(img) .+ 1) + + transform!(img_bool, output, v, z) + img_test = test_transform(img) .^ 2 + + @test Array(output) ≈ img_test + end + + # non-threaded + for test_idx in 1:20 + img = rand([0.0f0, 1.0f0], n, n) + + img_bool = boolean_indicator(img) + output = similar(img, Float32) + v = ones(Int32, size(img)) + z = ones(Float32, size(img) .+ 1) + + transform!(img_bool, output, v, z; threaded=false) + img_test = test_transform(img) .^ 2 + + @test Array(output) ≈ img_test + end + end + end + + @testset "3D transform!" begin + for n in [10, 100] + for test_idx in 1:5 + img = rand([0.0f0, 1.0f0], n, n, n) + + img_bool = boolean_indicator(img) + output = similar(img, Float32) + v = ones(Int32, size(img)) + z = ones(Float32, size(img) .+ 1) + + transform!(img_bool, output, v, z) + img_test = test_transform(img) .^ 2 + + @test Array(output) ≈ img_test + end + + # non-threaded + for test_idx in 1:5 + img = rand([0.0f0, 1.0f0], n, n, n) + + img_bool = boolean_indicator(img) + output = similar(img, Float32) + v = ones(Int32, size(img)) + z = ones(Float32, size(img) .+ 1) + + transform!(img_bool, output, v, z; threaded=false) + img_test = test_transform(img) .^ 2 + + @test Array(output) ≈ img_test + end + end + end + + if dev != Array + @testset "2D GPU transform!" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = dev(rand([0.0f0, 1.0f0], n, n)) + + img_bool = boolean_indicator(img) + output = similar(img, Float32) + v = dev(ones(Int32, size(img))) + z = dev(ones(Float32, size(img) .+ 1)) + + transform!(img_bool, output, v, z) + img_test = test_transform(Array(img)) .^ 2 + + @test Array(output) ≈ img_test + end + end + end + @testset "3D GPU transform!" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = dev(rand([0.0f0, 1.0f0], n, n, n)) + + img_bool = boolean_indicator(img) + output = similar(img, Float32) + v = dev(ones(Int32, size(img))) + z = dev(ones(Float32, size(img) .+ 1)) + + transform!(img_bool, output, v, z) + img_test = test_transform(Array(img)) .^ 2 + + @test Array(output) ≈ img_test + end + end + end + else + @info "No GPU available, skipping tests" + end end @testset "transform" begin - @testset "1D transform" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0.0f0, 1.0f0], n) - img_bool = boolean_indicator(img) - output = transform(img_bool) - img_test = test_transform(img) .^ 2 - @test Array(output) ≈ img_test - end - end - end - - @testset "2D transform" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0.0f0, 1.0f0], n, n) - img_bool = boolean_indicator(img) - output = transform(img_bool) - img_test = test_transform(img) .^ 2 - @test Array(output) ≈ img_test - end - - # non-threaded - for test_idx in 1:20 - img = rand([0.0f0, 1.0f0], n, n) - img_bool = boolean_indicator(img) - output = transform(img_bool; threaded = false) - img_test = test_transform(img) .^ 2 - @test Array(output) ≈ img_test - end - end - end - - @testset "3D transform" begin - for n in [10, 100] - for test_idx in 1:5 - img = rand([0.0f0, 1.0f0], n, n, n) - img_bool = boolean_indicator(img) - output = transform(img_bool) - img_test = test_transform(img) .^ 2 - @test Array(output) ≈ img_test - end - - # non-threaded - for test_idx in 1:5 - img = rand([0.0f0, 1.0f0], n, n, n) - img_bool = boolean_indicator(img) - output = transform(img_bool; threaded = false) - img_test = test_transform(img) .^ 2 - @test Array(output) ≈ img_test - end - end - end - - if dev != Array - @testset "2D GPU transform" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0.0f0, 1.0f0], n, n) - img_gpu = dev(img) - output = transform(img_gpu) - img_test = test_transform(img) .^ 2 - @test Array(output) ≈ img_test - end - end - end - - @testset "3D GPU transform" begin - for n in [10, 50, 100] - for test_idx in 1:20 - img = rand([0.0f0, 1.0f0], n, n, n) - img_gpu = dev(img) - output = transform(img_gpu) - img_test = test_transform(img) .^ 2 - @test Array(output) ≈ img_test - end - end - end - else - @info "No GPU available, skipping tests" - end + @testset "1D transform" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = rand([0.0f0, 1.0f0], n) + img_bool = boolean_indicator(img) + output = transform(img_bool) + img_test = test_transform(img) .^ 2 + @test Array(output) ≈ img_test + end + end + end + + @testset "2D transform" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = rand([0.0f0, 1.0f0], n, n) + img_bool = boolean_indicator(img) + output = transform(img_bool) + img_test = test_transform(img) .^ 2 + @test Array(output) ≈ img_test + end + + # non-threaded + for test_idx in 1:20 + img = rand([0.0f0, 1.0f0], n, n) + img_bool = boolean_indicator(img) + output = transform(img_bool; threaded=false) + img_test = test_transform(img) .^ 2 + @test Array(output) ≈ img_test + end + end + end + + @testset "3D transform" begin + for n in [10, 100] + for test_idx in 1:5 + img = rand([0.0f0, 1.0f0], n, n, n) + img_bool = boolean_indicator(img) + output = transform(img_bool) + img_test = test_transform(img) .^ 2 + @test Array(output) ≈ img_test + end + + # non-threaded + for test_idx in 1:5 + img = rand([0.0f0, 1.0f0], n, n, n) + img_bool = boolean_indicator(img) + output = transform(img_bool; threaded=false) + img_test = test_transform(img) .^ 2 + @test Array(output) ≈ img_test + end + end + end + + if dev != Array + @testset "2D GPU transform" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = dev(rand([0.0f0, 1.0f0], n, n)) + img_bool = boolean_indicator(img) + output = transform(img_bool) + img_test = test_transform(Array(img)) .^ 2 + @test Array(output) ≈ img_test + end + end + end + + @testset "3D GPU transform" begin + for n in [10, 50, 100] + for test_idx in 1:20 + img = dev(rand([0.0f0, 1.0f0], n, n, n)) + img_bool = boolean_indicator(img) + output = transform(img_bool) + img_test = test_transform(Array(img)) .^ 2 + @test Array(output) ≈ img_test + end + end + end + else + @info "No GPU available, skipping tests" + end end From 0944221c03b741134c1789bcc5483fcdca07676d Mon Sep 17 00:00:00 2001 From: Dale Black Date: Thu, 26 Dec 2024 10:49:15 -0800 Subject: [PATCH 4/5] up CI --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index aa6a86b..f9255e4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -12,8 +12,8 @@ jobs: fail-fast: false matrix: version: - - "1.9" - "1.10" + - "1.11" os: - ubuntu-latest # - macOS-latest (can't use until GitHub upgrades to m-series macs) From c3a6f45777f764e6e35ebf3deb05ef7ff1492c76 Mon Sep 17 00:00:00 2001 From: Dale Black Date: Thu, 26 Dec 2024 10:50:08 -0800 Subject: [PATCH 5/5] up CI cache --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f9255e4..8ee4f39 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -26,7 +26,7 @@ jobs: with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 + - uses: actions/cache@v4 env: cache-name: cache-artifacts with: