diff --git a/.github/workflows/RunTests.yml b/.github/workflows/RunTests.yml index 2971ffe..7198d0d 100644 --- a/.github/workflows/RunTests.yml +++ b/.github/workflows/RunTests.yml @@ -7,7 +7,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - julia-version: ["1.9"] + julia-version: ["1.11"] julia-arch: [x86, x64] os: [ubuntu-latest] diff --git a/Project.toml b/Project.toml index b958f53..a53faaf 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["michael catchen "] version = "0.2.1" [deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" HaltonSequences = "13907d55-377f-55d6-a9d6-25ac19e11b95" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" @@ -12,25 +13,27 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" +SimpleSDMLayers = "2c645270-77db-11e9-22c3-0f302a89c64c" SliceMap = "82cb661a-3f19-5665-9e27-df437c7e54c8" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Term = "22787eb5-b846-44ae-b979-8e399b8463ab" +TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" [weakdeps] -SpeciesDistributionToolkit = "72b53823-5c0b-4575-ad0e-8e97227ad13b" +NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" [extensions] -SDMToolkitExt = ["SpeciesDistributionToolkit"] [compat] Distributions = "0.25" HaltonSequences = "0.2" +HiGHS = "1.5" +JuMP = "1.11" +ProgressMeter = "1.7.2" Requires = "1.3" SliceMap = "0.2.7" SpecialFunctions = "2.1" StatsBase = "0.34" -ProgressMeter = "1.7.2" -JuMP = "1.11" -HiGHS = "1.5" -julia = "1.9" +julia = "1.9" diff --git a/docs/Project.toml b/docs/Project.toml index 71daffa..3512144 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,10 +1,11 @@ [deps] +BiodiversityObservationNetworks = "a5b868d3-191d-4bba-a38a-ad28190da010" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" -DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" +DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" HaltonSequences = "13907d55-377f-55d6-a9d6-25ac19e11b95" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" @@ -12,7 +13,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SimpleSDMLayers = "2c645270-77db-11e9-22c3-0f302a89c64c" SliceMap = "82cb661a-3f19-5665-9e27-df437c7e54c8" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" -SpeciesDistributionToolkit = "72b53823-5c0b-4575-ad0e-8e97227ad13b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/docs/make.jl b/docs/make.jl index 89eee49..3190e93 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,26 +2,25 @@ push!(LOAD_PATH, "../src/") using Documenter using DocumenterCitations -using DocumenterMarkdown +using DocumenterVitepress using BiodiversityObservationNetworks -bibliography = CitationBibliography(joinpath(@__DIR__, "BONs.bib")) +bib = CitationBibliography(joinpath(@__DIR__, "BONs.bib")) makedocs( - bibliography; sitename = "BiodiversityObservationNetwork.jl", authors = "Michael D. Catchen, Timothée Poisot, Kari Norman, Hana Mayall, Tom Malpas", modules = [BiodiversityObservationNetworks], - format = Markdown(), - + format = DocumenterVitepress.MarkdownVitepress( + repo="https://github.com/gottacatchenall/BiodiversityObservationNetworks.jl", + devurl="dev", + ), + warnonly = true, + plugins = [bib] ) deploydocs(; - deps = Deps.pip("mkdocs", "pygments", "python-markdown-math", "mkdocs-material"), repo = "github.com/PoisotLab/BiodiversityObservationNetworks.jl.git", - devbranch = "main", - make = () -> run(`mkdocs build`), - target = "site", push_preview = true, ) diff --git a/docs/src/vignettes/entropize.md b/docs/src/vignettes/entropize.md index 95ad631..6734c88 100644 --- a/docs/src/vignettes/entropize.md +++ b/docs/src/vignettes/entropize.md @@ -29,15 +29,6 @@ pixel scale: ```@example 1 U = entropize(measurements) -heatmap(U) -``` - -The values closest to the median of the distribution have the highest entropy, and the values closest to its extrema have an entropy of 0. The entropy matrix is guaranteed to have values on the unit interval. - -We can use `entropize` as part of a pipeline, and overlay the points optimized based on entropy on the measurement map: - -```@example 1 locations = - measurements |> entropize |> seed(BalancedAcceptance(; numpoints = 100)) |> first -heatmap(U) -``` \ No newline at end of file + seed(BalancedAcceptance(; numsites = 100, uncertainty=U)) +``` diff --git a/docs/src/vignettes/overview.md b/docs/src/vignettes/overview.md index 58e6857..8183204 100644 --- a/docs/src/vignettes/overview.md +++ b/docs/src/vignettes/overview.md @@ -31,22 +31,15 @@ less) uncertainty. To start with, we will extract 200 candidate points, *i.e.* ```@example 1 -pack = seed(BalancedAcceptance(; numpoints = 200), U); +candidates = seed(BalancedAcceptance(; numsites = 200)); ``` -The output of a `BONSampler` (whether at the seeding or refinement step) is -always a tuple, storing in the first position a vector of `CartesianIndex` -elements, and in the second position the matrix given as input. We can have a -look at the first five points: +We can have a look at the first five points: ```@example 1 -first(pack)[1:5] +candidates[1:5] ``` -Although returning the input matrix may seem redundant, it actually allows to -chain samplers together to build pipelines that take a matrix as input, and -return a set of places to sample as outputs; an example is given below. - The positions of locations to sample are given as a vector of `CartesianIndex`, which are coordinates in the uncertainty matrix. Once we have generated a candidate proposal, we can further refine it using a `BONRefiner` -- in this @@ -54,8 +47,7 @@ case, `AdaptiveSpatial`, which performs adaptive spatial sampling (maximizing the distribution of entropy while minimizing spatial auto-correlation). ```@example 1 -candidates, uncertainty = pack -locations, _ = refine(candidates, AdaptiveSpatial(; numpoints = 50), uncertainty) +locations = refine(candidates, AdaptiveSpatial(; numsites = 50, uncertainty=U)) locations[1:5] ``` @@ -72,10 +64,8 @@ functions have a curried version that allows chaining them together using pipes ```@example 1 locations = - U |> - seed(BalancedAcceptance(; numpoints = 200)) |> - refine(AdaptiveSpatial(; numpoints = 50)) |> - first + seed(BalancedAcceptance(; numsites = 200)) |> + refine(AdaptiveSpatial(; numsites = 50, uncertainty=U)) ``` This works because `seed` and `refine` have curried versions that can be used @@ -84,5 +74,6 @@ the original uncertainty matrix: ```@example 1 plt = heatmap(U) -#scatter!(plt, [x[1] for x in locations], [x[2] for x in locations], ms=2.5, mc=:white, label="") -``` \ No newline at end of file +scatter!(plt, [x[1] for x in locations], [x[2] for x in locations]) +current_figure() +``` diff --git a/docs/src/vignettes/uniqueness.md b/docs/src/vignettes/uniqueness.md index 41e335e..756f11f 100644 --- a/docs/src/vignettes/uniqueness.md +++ b/docs/src/vignettes/uniqueness.md @@ -32,8 +32,6 @@ Now we'll use the `stack` function to combine our four environmental layers into layers = BiodiversityObservationNetworks.stack([temp,precip,elevation]); ``` -this requires NeutralLandscapes v0.1.2 - ```@example 1 uncert = rand(MidpointDisplacement(0.8), size(temp), mask=temp); heatmap(uncert) @@ -42,15 +40,15 @@ heatmap(uncert) Now we'll get a set of candidate points from a BalancedAcceptance seeder that has no bias toward higher uncertainty values. ```@example 1 -candpts, uncert = uncert |> seed(BalancedAcceptance(numpoints=100, α=0.0)); +candpts = seed(BalancedAcceptance(numsites=100)); ``` Now we'll `refine` our `100` candidate points down to the 30 most environmentally unique. ```@example 1 -finalpts, uncert = refine(candpts, Uniqueness(;numpoints=30, layers=layers), uncert) -#= +finalpts = refine(candpts, Uniqueness(;numsites=30, layers=layers)) heatmap(uncert) -scatter!([p[2] for p in candpts], [p[1] for p in candpts], fa=0.0, msc=:white, label="Candidate Points") -scatter!([p[2] for p in finalpts], [p[1] for p in finalpts], c=:dodgerblue, msc=:white, label="Selected Points")=# -``` \ No newline at end of file +scatter!([p[1] for p in candpts], [p[2] for p in candpts], color=:white) +scatter!([p[1] for p in finalpts], [p[2] for p in finalpts], color=:dodgerblue, msc=:white) +current_figure() +``` diff --git a/ext/SDTExt.jl b/ext/SDTExt.jl new file mode 100644 index 0000000..d1ce98f --- /dev/null +++ b/ext/SDTExt.jl @@ -0,0 +1,4 @@ +module SDTExt + + +end \ No newline at end of file diff --git a/src/BiodiversityObservationNetworks.jl b/src/BiodiversityObservationNetworks.jl index fa08dd4..ff633f0 100644 --- a/src/BiodiversityObservationNetworks.jl +++ b/src/BiodiversityObservationNetworks.jl @@ -1,5 +1,6 @@ module BiodiversityObservationNetworks +using SimpleSDMLayers using Distributions using Random using HaltonSequences @@ -10,44 +11,58 @@ using SliceMap using JuMP using HiGHS using LinearAlgebra +using Term +using TestItems include("types.jl") -export BONSeeder, BONRefiner, BONSampler +export BONSampler +export Sites, numsites, pool +export LayerType, DataLayer, InclusionProbability +export Layer, Stack + +include("sample.jl") +export sample + +include("exceptions.jl") +export BONException, TooFewSites, TooManySites include("simplerandom.jl") export SimpleRandom +include("spatialstratified.jl") +export SpatiallyStratified + include("balancedacceptance.jl") export BalancedAcceptance -include("adaptivespatial.jl") -export AdaptiveSpatial +include("weightedbas.jl") +export WeightedBalancedAcceptance + +include("adaptivehotspot.jl") +export AdaptiveHotspot include("cubesampling.jl") export CubeSampling +include("fractaltriad.jl") +export FractalTriad + +include("grts.jl") +export GeneralizedRandomTessellatedStratified + include("uniqueness.jl") export Uniqueness -include("seed.jl") -export seed, seed! +#include("seed.jl") +#export seed, seed! -include("refine.jl") -export refine, refine! +#include("refine.jl") +#export refine, refine! include("entropize.jl") export entropize, entropize! -include("optimize.jl") -export optimize - include("utils.jl") -export stack, squish, _squish, _norm - -#=using Requires -function __init__() - @require SpeciesDistributionToolkit="72b53823-5c0b-4575-ad0e-8e97227ad13b" include(joinpath("integrations", "simplesdms.jl")) - @require Zygote="e88e6eb3-aa80-5325-afca-941959d7151f" include(joinpath("integrations", "zygote.jl")) -end=# +export stack end diff --git a/src/adaptivespatial.jl b/src/adaptivehotspot.jl similarity index 52% rename from src/adaptivespatial.jl rename to src/adaptivehotspot.jl index 7cce7c6..c3c9afb 100644 --- a/src/adaptivespatial.jl +++ b/src/adaptivehotspot.jl @@ -1,42 +1,32 @@ """ - AdaptiveSpatial + AdaptiveHotspot -... - -**numpoints**, an Integer (def. 50), specifying the number of points to use. - -**α**, an AbstractFloat (def. 1.0), specifying ... +- **numsites**: an integer specifying the number of points to use (default is 30) +- **pool**: the sites that could potentially be picked +- **uncertainty**: a `Layer` specifying the current uncertainty at each site """ -Base.@kwdef mutable struct AdaptiveSpatial{T <: Integer} <: BONRefiner - numpoints::T = 50 - function AdaptiveSpatial(numpoints) - if numpoints < one(numpoints) - throw( - ArgumentError( - "You cannot have an AdaptiveSpatial with fewer than one point", - ), - ) - end - return new{typeof(numpoints)}(numpoints) +Base.@kwdef mutable struct AdaptiveHotspot{T <: Integer} <: BONSampler + numsites::T = 30 + function AdaptiveHotspot(numsites) + as = new{typeof(numsites)}(numsites) + check_arguments(as) + return as end end -_generate!( - coords::Vector{CartesianIndex}, - pool::Vector{CartesianIndex}, - sampler::AdaptiveSpatial, - uncertainty) = throw(ArgumentError( - "You can only call AdaptiveSpatial with a single layer of type Matrix")) +function check_arguments(as::AdaptiveHotspot) + check(TooFewSites, as) + return nothing +end function _generate!( coords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, - sampler::AdaptiveSpatial, - uncertainty::Array{T,2}, -) where {T <: AbstractFloat} - + sampler::AdaptiveHotspot, + uncertainty::L +) where L<:Layer # Distance matrix (inlined) - d = zeros(Float64, Int((sampler.numpoints * (sampler.numpoints - 1)) / 2)) + d = zeros(Float64, Int((sampler.numsites * (sampler.numsites - 1)) / 2)) # Start with the point with maximum entropy imax = last(findmax([uncertainty[i] for i in pool])) @@ -46,7 +36,7 @@ function _generate!( best_score = 0.0 best_s = 1 - for i in 2:(sampler.numpoints) + for i in 2:(sampler.numsites) for (ci, cs) in enumerate(pool) coords[i] = cs # Distance update @@ -65,7 +55,7 @@ function _generate!( end coords[i] = popat!(pool, best_s) end - return (coords, uncertainty) + return coords end function _matérn(d, ρ, ν) @@ -86,3 +76,23 @@ function _D(a1::T, a2::T) where {T <: CartesianIndex{2}} x2, y2 = a2.I return sqrt((x1 - x2)^2.0 + (y1 - y2)^2.0) end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "AdaptiveHotspot default constructor works" begin + @test typeof(AdaptiveHotspot()) <: AdaptiveHotspot +end + +@testitem "AdaptiveHotspot has right subtypes" begin + @test AdaptiveHotspot <: BONSampler +end + +@testitem "AdaptiveHotspot requires positive number of sites" begin + @test_throws TooFewSites AdaptiveHotspot(numsites = 1) + @test_throws TooFewSites AdaptiveHotspot(numsites = 0) + @test_throws TooFewSites AdaptiveHotspot(numsites = -1) +end diff --git a/src/balancedacceptance.jl b/src/balancedacceptance.jl index 8c95df0..42386f6 100644 --- a/src/balancedacceptance.jl +++ b/src/balancedacceptance.jl @@ -4,78 +4,92 @@ A `BONSeeder` that uses Balanced-Acceptance Sampling (Van-dem-Bates et al. 2017 https://doi.org/10.1111/2041-210X.13003) """ -Base.@kwdef mutable struct BalancedAcceptance{I <: Integer, F <: AbstractFloat} <: BONSeeder - numpoints::I = 50 - α::F = 1.0 - function BalancedAcceptance(numpoints, α) - if numpoints < one(numpoints) - throw( - ArgumentError( - "You cannot have a BalancedAcceptance with fewer than one point", - ), - ) - end - if α < zero(α) - throw( - ArgumentError( - "The value of α for BalancedAcceptance must be greater or equal to 0", - ), - ) - end - return new{typeof(numpoints), typeof(α)}(numpoints, α) +Base.@kwdef struct BalancedAcceptance{I<:Integer} <: BONSampler + numsites::I = 30 + dims::Tuple{I,I} = (50,50) + function BalancedAcceptance(numsites::I, dims::Tuple{J,J}) where {I<:Integer, J<:Integer} + bas = new{I}(numsites, dims) + check_arguments(bas) + return bas end end -# TODO -# - make this not spaghetti code -# - this should accept a boolean array mask arg which should -# get treated like NaNs -function _generate!( - coords::Vector{CartesianIndex}, - sampler::BalancedAcceptance, - uncertainty::Matrix{T}, -) where {T <: AbstractFloat} +_default_pool(bas::BalancedAcceptance) = pool(bas.dims) +BalancedAcceptance(M::Matrix{T}; numsites = 30) where T = BalancedAcceptance(numsites, size(M)) +BalancedAcceptance(l::Layer; numsites = 30) = BalancedAcceptance(numsites, size(l)) + +maxsites(bas::BalancedAcceptance) = prod(bas.dims) + +function check_arguments(bas::BalancedAcceptance) + check(TooFewSites, bas) + check(TooManySites, bas) + return nothing +end + +function _sample!( + selected::S, + candidates::C, + ba::BalancedAcceptance +) where {S<:Sites,C<:Sites} seed = rand(Int32.(1e0:1e7), 2) - np, α = sampler.numpoints, sampler.α - x, y = size(uncertainty) - - - nonnan_indices = findall(!isnan, uncertainty) - stduncert = similar(uncertainty) - - uncert_values = uncertainty[nonnan_indices] - stduncert_values = similar(uncert_values) - zfit = nothing - if var(uncert_values) > 0 - zfit = StatsBase.fit(ZScoreTransform, uncert_values) - stduncert_values = StatsBase.transform(zfit, uncert_values) - end - - nonnan_counter = 1 - for i in eachindex(uncertainty) - if isnan(uncertainty[i]) - stduncert[i] = NaN - elseif !isnothing(zfit) - stduncert[i] = stduncert_values[nonnan_counter] - nonnan_counter += 1 - else - stduncert[i] = 1. - end - end + n = numsites(ba) + x,y = ba.dims + + candidate_mask = zeros(Bool, x, y) + candidate_mask[candidates.coordinates] .= 1 + + # This is sequentially adding points, needs to check if that value is masked + # at each step and skip if so + exp_needed = 10 * Int(ceil((length(candidates)/(x*y)) .* n)) - reluncert = broadcast(x -> isnan(x) ? NaN : exp(α * x) / (1 + exp(α * x)), stduncert) - ptct = 1 - addedpts = 1 - while addedpts <= length(coords) + ct = 1 + for ptct in 1:exp_needed i, j = haltonvalue(seed[1] + ptct, 2), haltonvalue(seed[2] + ptct, 3) - candcoord = CartesianIndex(convert.(Int32, [ceil(x * i), ceil(y * j)])...) - prob = reluncert[candcoord] - if !isnan(prob) && rand() < prob - coords[addedpts] = candcoord - addedpts += 1 - end - ptct += 1 + proposal = CartesianIndex(convert.(Int32, [ceil(x * i), ceil(y * j)])...) + if ct > n + break + end + if candidate_mask[proposal] + selected[ct] = proposal + ct += 1 + end end + return selected +end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "BalancedAcceptance default constructor works" begin + @test typeof(BalancedAcceptance()) <: BalancedAcceptance +end + +@testitem "BalancedAcceptance requires positive number of sites" begin + @test_throws TooFewSites BalancedAcceptance(numsites = 1) + @test_throws TooFewSites BalancedAcceptance(numsites = 0) + @test_throws TooFewSites BalancedAcceptance(numsites = -1) +end + +@testitem "BalancedAcceptance can't be run with too many sites" begin + numpts, numcandidates = 26, 25 + @test numpts > numcandidates # who watches the watchmen? + dims = Int32.(floor.((sqrt(numcandidates), sqrt(numcandidates)))) + @test_throws TooManySites BalancedAcceptance(numpts, dims) +end + +@testitem "BalancedAcceptance can generate points" begin + bas = BalancedAcceptance() + coords = sample(bas) + + @test typeof(coords) <: Sites +end + - return (coords, uncertainty) +@testitem "BalancedAcceptance can take number of points as keyword argument" begin + N = 40 + bas = BalancedAcceptance(; numsites = N) + @test bas.numsites == N end diff --git a/src/cubesampling.jl b/src/cubesampling.jl index af17a4e..91a591a 100644 --- a/src/cubesampling.jl +++ b/src/cubesampling.jl @@ -5,85 +5,105 @@ A `BONRefiner` that uses Cube Sampling (Tillé 2011) ... -**numpoints**, an Integer (def. 50), specifying the number of points to use. +**numsites**, an Integer (def. 50), specifying the number of points to use. **fast**, a Boolean (def. true) indicating whether to use the fast flight algorithm. **x**, a Matrix of auxillary variables for the candidate points, with one row for each variable and one column for each candidate point. -**πₖ**, a Float Vector indicating the probabilities of inclusion for each candidate point; should sum to numpoints value. +**πₖ**, a Float Vector indicating the probabilities of inclusion for each candidate point; should sum to numsites value. """ -Base.@kwdef mutable struct CubeSampling{I <: Integer, M <: Matrix, V <: Vector} <: BONRefiner - numpoints::I = 50 +Base.@kwdef struct CubeSampling{I <: Integer, M <: Matrix, V <: Vector} <: BONSampler + numsites::I = 50 fast::Bool = true x::M = rand(0:4, 3, 50) - πₖ::V = zeros(size(x,2)) + πₖ::V = zeros(size(x, 2)) + function CubeSampling(numsites, fast, x, πₖ) + cs = new{typeof(numsites), typeof(x), typeof(πₖ)}(numsites, fast, x, πₖ) + _check_arguments(cs) + return cs + end +end - function CubeSampling(numpoints, fast, x, πₖ) +numsites(cubesampling::CubeSampling) = cubesampling.numsites +maxsites(cubesampling::CubeSampling) = size(cubesampling.x, 2) - if numpoints < one(numpoints) - throw(ArgumentError("You cannot have a CubeSampling with fewer than one point.",),) - end - if numpoints > length(πₖ) - throw(ArgumentError("You cannot select more points than the number of candidate points.",),) - end - if length(πₖ) != size(x, 2) - throw(DimensionMismatch("The number of inclusion probabilites does not match the dimensions of the auxillary variable matrix.",),) - end - return new{typeof(numpoints), typeof(x), typeof(πₖ)}(numpoints, fast, x, πₖ) +function check_arguments(cubesampling::CubeSampling) + check(TooFewSites, cubesampling) + check(TooManySites, cubesampling) + + if numsites > length(cubesampling.πₖ) + throw( + ArgumentError( + "You cannot select more points than the number of candidate points.", + ), + ) + end + if length(cubesampling.πₖ) != size(cubesampling.x, 2) + throw( + DimensionMismatch( + "The number of inclusion probabilites does not match the dimensions of the auxillary variable matrix.", + ), + ) end + return end function check_conditions(coords, pool, sampler) πₖ = sampler.πₖ if sum(sampler.πₖ) == 0 @info "Probabilities of inclusion were not provided, so we assume equal probability design." - πₖ = fill(sampler.numpoints/length(pool), length(pool)) - end - if round(Int, sum(πₖ)) != sampler.numpoints - @warn "The inclusion probabilities sum to $(round(Int, sum(πₖ))), which will be your sample size instead of numpoints." + πₖ = [sampler.numsites / length(pool) for _ in eachindex(pool)] + end + if round(Int, sum(πₖ)) != sampler.numsites + @warn "The inclusion probabilities sum to $(round(Int, sum(πₖ))), which will be your sample size instead of numsites." end if length(pool) != length(πₖ) - throw(DimensionMismatch("The πₖ vector does not match the number of candidate points.",),) + throw( + DimensionMismatch( + "The πₖ vector does not match the number of candidate points.", + ), + ) end if length(πₖ) != size(sampler.x, 2) - throw(DimensionMismatch("There is a mismatch in the number of inclusion probabilities and the points in the auxillary matrix.",),) + throw( + DimensionMismatch( + "There is a mismatch in the number of inclusion probabilities and the points in the auxillary matrix.", + ), + ) end - πₖ - + return πₖ end - function _generate!( - coords::Vector{CartesianIndex}, + coords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::CubeSampling, - uncertainty::Matrix{T} - ) where {T <: AbstractFloat} - + uncertainty::Matrix{T}, +) where {T <: AbstractFloat} πₖ = check_conditions(coords, pool, sampler) # sort points by distance in auxillary variable space dist = mahalanobis(πₖ, sampler.x) - perm = sortperm(dist, rev=true) - pool, πₖ = pool[perm], πₖ[perm] + perm = sortperm(dist; rev = true) + pool, πₖ = pool[perm], πₖ[perm] + + x = sampler.x[:, perm] - x = sampler.x[:,perm] - # To keep the sample size enforced, add πₖ as an aux variable x = vcat(transpose(πₖ), x) # pick flight phase algorithm π_optimal_flight = sampler.fast ? cubefastflight(πₖ, x) : cubeflight(πₖ, x) # check if there are non-integer probabilities - non_int_ind = findall(u -> u .∉ Ref(Set([0,1])), π_optimal_flight) + non_int_ind = findall(u -> u .∉ Ref(Set([0, 1])), π_optimal_flight) # if so, perform landing phase to resolve them π_optimal = isempty(non_int_ind) ? π_optimal_flight : cubeland(π_optimal_flight, πₖ, x) - selected = pool[findall(z -> z == 1, π_optimal)] + selected = pool[findall(isequal(1), π_optimal)] - for i = 1:length(selected) + for i in eachindex(selected) coords[i] = pool[i] end @@ -91,16 +111,15 @@ function _generate!( end function cubeflight(πₖ, x) - N = length(πₖ) rowdim = size(x)[1] j = 0 - set_nullspace = zeros(1,2) + set_nullspace = zeros(1, 2) π_optimal = πₖ # check if there is a possible u to satisfy the conditions while size(set_nullspace)[2] != 0 - j = j+1 + j = j + 1 ## STEP 1 ## @@ -108,11 +127,11 @@ function cubeflight(πₖ, x) # A is the matrix of auxillary variables didvided by the inclusion probability # for the population unit A = similar(x, Float64) - for i = 1:N - if π_optimal[i] .∈ Ref(Set([0,1])) - A[:,i] = zeros(rowdim) - else - A[:,i] = x[:,i] ./ π_optimal[i] + for i in 1:N + if π_optimal[i] .∈ Ref(Set([0, 1])) + A[:, i] = zeros(rowdim) + else + A[:, i] = x[:, i] ./ π_optimal[i] end end @@ -123,13 +142,13 @@ function cubeflight(πₖ, x) # let's make sure the rows that need it satisfy that condition # get index where π_optimal is 0 or 1 - π_integer = findall(u -> u .∈ Ref(Set([0,1])), π_optimal) + π_integer = findall(u -> u .∈ Ref(Set([0, 1])), π_optimal) # if none of the π_optimal's are fixed yet (as 0 or 1) u can be a vector from the nullspace if length(π_integer) == 0 u = kernel[:, rand(1:size(kernel)[2])] - - # if only one is fixed, can also pick a u vector but it shouldn't be the trivial indicator vector + + # if only one is fixed, can also pick a u vector but it shouldn't be the trivial indicator vector elseif length(π_integer) == 1 sums = sum(eachrow(kernel)) # find indicator vector @@ -139,12 +158,12 @@ function cubeflight(πₖ, x) ind = deleteat!(collect(1:size(kernel)[2]), ind) u = kernel[:, rand(ind)] - # otherwise, need to make sure u_k = 0 condition is satisfied for fixed pikstar's + # otherwise, need to make sure u_k = 0 condition is satisfied for fixed pikstar's else # get rows of A's nullspace corresponding to those pikstar's set_A = kernel[π_integer, :] # get the nullspace of that matrix - + set_nullspace = nullspace(set_A) if size(set_nullspace)[2] == 0 @@ -175,38 +194,36 @@ function cubeflight(πₖ, x) # λ₁ = -πₖ⁺/u # λ₂ = (πₖ⁺ - 1)/u - λ₁_max(; u, πₖ) = @. ifelse(u > 0, (1 - πₖ) / u, - πₖ / u) + λ₁_max(; u, πₖ) = @. ifelse(u > 0, (1 - πₖ) / u, -πₖ / u) λ₂_max(; u, πₖ) = @. ifelse(u > 0, πₖ / u, (πₖ - 1) / u) - λ₁ = minimum(filter(x -> isfinite(x), λ₁_max(u = u, πₖ = πₖ))) - λ₂ = minimum(filter(x -> isfinite(x), λ₂_max(u = u, πₖ = πₖ))) + λ₁ = minimum(filter(isfinite, λ₁_max(; u = u, πₖ = πₖ))) + λ₂ = minimum(filter(isfinite, λ₂_max(; u = u, πₖ = πₖ))) ## STEP 3 ## # calculate the inequality expression for both lambdas - λ1_ineq = @. πₖ + ( λ₁ * u) - λ2_ineq = @. πₖ - ( λ₂ * u) + λ1_ineq = @. πₖ + (λ₁ * u) + λ2_ineq = @. πₖ - (λ₂ * u) ineq_mat = reduce(hcat, [λ1_ineq, λ2_ineq]) # the new inclusion probability π is one of the lambda expressions with a given probability q1, q2 - q₁ = λ₂/(λ₁ + λ₂) + q₁ = λ₂ / (λ₁ + λ₂) q₂ = 1 - q₁ - π_optimal = map(r -> sample(r, Weights([q₁,q₂])), eachrow(ineq_mat)) - end - return(π_optimal) + π_optimal = map(r -> sample(r, Weights([q₁, q₂])), eachrow(ineq_mat)) + end + return (π_optimal) end function cubefastflight(πₖ, x) - ## Initialization ## - # number of auxillary variables num_aux = size(x)[1] # get all non-integer probabilities non_int_ind = findall(u -> u .∉ Ref(Set([0, 1])), πₖ) - + π_nonint = πₖ[non_int_ind] Ψ = π_nonint[1:(num_aux + 1)] @@ -217,11 +234,10 @@ function cubefastflight(πₖ, x) k = num_aux + 2 - while k <= length(π_nonint) - Ψ = update_psi(Ψ, B) + Ψ = update_psi(Ψ, B) - if length(findall(z -> z .<0, Ψ)) > 0 + if length(findall(z -> z .< 0, Ψ)) > 0 throw(error("Negative inclusion probability")) end @@ -245,45 +261,45 @@ function cubefastflight(πₖ, x) # now do the final iteration Ψ = update_psi(Ψ, B) π_nonint[r] = Ψ - return(π_nonint) + return (π_nonint) end function update_psi(Ψ, B) - # get vector u in the kernel of B - u = nullspace(B)[:, 1] + # get vector u in the kernel of B + u = nullspace(B)[:, 1] - # want max λ₁, λ₂ such that 0 <= Ψ + λ₁*u <= 1 and 0 <= Ψ - λ₂*u <= 1 - # solve the inequalities for λ and you get max values for u > 0 and u < 0 - # for λ₁ : for u > 0, λ₁ = (1-Ψ)/u; for u < 0, λ₁ = -Ψ/u - # for λ₂ : for u > 0, λ₂ = Ψ/u; for u < 0, λ₂ = (Ψ - 1)/u + # want max λ₁, λ₂ such that 0 <= Ψ + λ₁*u <= 1 and 0 <= Ψ - λ₂*u <= 1 + # solve the inequalities for λ and you get max values for u > 0 and u < 0 + # for λ₁ : for u > 0, λ₁ = (1-Ψ)/u; for u < 0, λ₁ = -Ψ/u + # for λ₂ : for u > 0, λ₂ = Ψ/u; for u < 0, λ₂ = (Ψ - 1)/u - λ₁_max(; u, πₖ) = @. ifelse(u > 0, (1 - πₖ) / u, -πₖ / u) - λ₂_max(; u, πₖ) = @. ifelse(u > 0, πₖ / u, (πₖ - 1) / u) + λ₁_max(; u, πₖ) = @. ifelse(u > 0, (1 - πₖ) / u, -πₖ / u) + λ₂_max(; u, πₖ) = @. ifelse(u > 0, πₖ / u, (πₖ - 1) / u) - λ₁_vec = filter(x -> isfinite(x), λ₁_max(; u = u, πₖ = Ψ)) - λ₂_vec = filter(x -> isfinite(x), λ₂_max(; u = u, πₖ = Ψ)) + λ₁_vec = filter(x -> isfinite(x), λ₁_max(; u = u, πₖ = Ψ)) + λ₂_vec = filter(x -> isfinite(x), λ₂_max(; u = u, πₖ = Ψ)) - deleteat!(λ₁_vec, λ₁_vec .<= 0) - deleteat!(λ₂_vec, λ₂_vec .<= 0) + deleteat!(λ₁_vec, λ₁_vec .<= 0) + deleteat!(λ₂_vec, λ₂_vec .<= 0) - λ₁ = minimum(λ₁_vec) - λ₂ = minimum(λ₂_vec) + λ₁ = minimum(λ₁_vec) + λ₂ = minimum(λ₂_vec) - # calculate the inequality expression for both lambdas - λ₁_ineq = @. Ψ + (λ₁ * u) - λ₂_ineq = @. Ψ - (λ₂ * u) + # calculate the inequality expression for both lambdas + λ₁_ineq = @. Ψ + (λ₁ * u) + λ₂_ineq = @. Ψ - (λ₂ * u) - # checking for floating point issues - tol = 1e-13 - λ₁_ineq[abs.(λ₁_ineq) .< tol] .= 0 - λ₂_ineq[abs.(λ₂_ineq) .< tol] .= 0 + # checking for floating point issues + tol = 1e-13 + λ₁_ineq[abs.(λ₁_ineq) .< tol] .= 0 + λ₂_ineq[abs.(λ₂_ineq) .< tol] .= 0 - # the new inclusion probability π is one of the lambda expressions with a given probability q1, q2 - q₁ = λ₂ / (λ₁ + λ₂) + # the new inclusion probability π is one of the lambda expressions with a given probability q1, q2 + q₁ = λ₂ / (λ₁ + λ₂) - new_πₖ = rand() < q₁ ? λ₁_ineq : λ₂_ineq - return(new_πₖ) -end + new_πₖ = rand() < q₁ ? λ₁_ineq : λ₂_ineq + return (new_πₖ) +end function cubeland(pikstar, πₖ, x) ### Landing Phase ### @@ -292,7 +308,7 @@ function cubeland(pikstar, πₖ, x) pikstar_return = copy(pikstar) # get all non-integer probabilities - non_int_ind = findall(u -> u .∉ Ref(Set([0,1])), pikstar_return) + non_int_ind = findall(u -> u .∉ Ref(Set([0, 1])), pikstar_return) non_int_piks = pikstar_return[non_int_ind] N_land = length(non_int_piks) @@ -309,7 +325,7 @@ function cubeland(pikstar, πₖ, x) # then get matrix of potential sample design # get vector with appropriate allocation of 0's and 1's - base_vec = vcat(repeat([1.0], outer = n_land), repeat([0.0], outer = (N_land - n_land))) + base_vec = vcat(repeat([1.0]; outer = n_land), repeat([0.0]; outer = (N_land - n_land))) samps = reduce(vcat, transpose.(unique_permutations(base_vec))) @@ -322,18 +338,18 @@ function cubeland(pikstar, πₖ, x) # let's get A for the non-integer units A_land = x_land ./ reshape(πₖ[non_int_ind], :, N_land) - sample_pt = A_land * transpose(sub_mat) ## FIXME: need to deal with the case that there are fixed zeros in πₖ #A = x ./ reshape(πₖ, :, N) - zero_pik_ind = findall(u -> u == 0, πₖ) - A = x[:, setdiff(1:end, zero_pik_ind)] ./ reshape(πₖ[setdiff(1:end, zero_pik_ind)], :, length(πₖ) - length(zero_pik_ind)) - + zero_pik_ind = findall(isequal(0), πₖ) + A = + x[:, setdiff(1:end, zero_pik_ind)] ./ + reshape(πₖ[setdiff(1:end, zero_pik_ind)], :, length(πₖ) - length(zero_pik_ind)) - cost = zeros(size(samps,1)) + cost = zeros(size(samps, 1)) for i in 1:size(samps)[1] - cost[i] = transpose(sample_pt[:, i]) * inv(A*transpose(A)) * sample_pt[:, i] + cost[i] = transpose(sample_pt[:, i]) * inv(A * transpose(A)) * sample_pt[:, i] end # get matrix of samples and costs @@ -343,22 +359,26 @@ function cubeland(pikstar, πₖ, x) ## linear programing ## model = Model(HiGHS.Optimizer) - @variable(model, ps[1:size(samps,1)] >= 0) + @variable(model, ps[1:size(samps, 1)] >= 0) # multiple cost (lp_mat[2]) by ps[id], where id is lp_mat[1] - @objective(model, Min, sum(sample[2] * ps[trunc(Int, sample[1])] for sample in eachrow(lp_mat))) + @objective( + model, + Min, + sum(sample[2] * ps[trunc(Int, sample[1])] for sample in eachrow(lp_mat)) + ) @constraint(model, sum(ps[id]) == 1) - for i in 1:size(samps,2) - @constraint(model, sum(ps .* (samps.>0)[:,i]) == non_int_piks[i]) + for i in 1:size(samps, 2) + @constraint(model, sum(ps .* (samps .> 0)[:, i]) == non_int_piks[i]) end optimize!(model) - if has_values(model) + if has_values(model) samp_prob = value.(ps) - + # pick a sample based on their probabilities samp_ind = sample(1:length(samp_prob), Weights(samp_prob)) @@ -366,23 +386,26 @@ function cubeland(pikstar, πₖ, x) pikstar_return[non_int_ind] = samps[samp_ind, :] else @warn "The linear program did not find a feasible solution." - pikstar_return[non_int_ind] = samps[sample(1:size(samps,1)), :] - end + pikstar_return[non_int_ind] = samps[sample(1:size(samps, 1)), :] + end - return(pikstar_return) + return pikstar_return end # all credit to stackoverflow https://stackoverflow.com/questions/65051953/julia-generate-all-non-repeating-permutations-in-set-with-duplicates -function unique_permutations(x::T, prefix=T()) where T +function unique_permutations(x::T, prefix = T()) where {T} if length(x) == 1 return [[prefix; x]] else t = T[] for i in eachindex(x) - if i > firstindex(x) && x[i] == x[i-1] + if i > firstindex(x) && x[i] == x[i - 1] continue end - append!(t, unique_permutations([x[begin:i-1];x[i+1:end]], [prefix; x[i]])) + append!( + t, + unique_permutations([x[begin:(i - 1)]; x[(i + 1):end]], [prefix; x[i]]), + ) end return t end @@ -398,21 +421,33 @@ function mahalanobis(πₖ, x) p = size(x, 1) x̂ = x ./ reshape(πₖ, :, N) - mean_x̂ = (1/N) .* sum(x̂, dims = 2) + mean_x̂ = (1 / N) .* sum(x̂; dims = 2) k_vecs = x̂ .- mean_x̂ outer_prods = Array{Float64}(undef, p, p, N) for i in 1:N outer_prods[:, :, i] = k_vecs[1:p, i] * transpose(k_vecs[1:p, i]) end - - sigma = (1/(N-1)) * dropdims(sum(outer_prods, dims = 3), dims = 3) + + sigma = (1 / (N - 1)) * dropdims(sum(outer_prods; dims = 3); dims = 3) inv_sigma = inv(sigma) - + d = Vector{Float64}(undef, N) for i in 1:N d[i] = (transpose(x̂[1:p, i] - mean_x̂) * inv_sigma * (x̂[1:p, i] - mean_x̂))[1] end return d -end \ No newline at end of file +end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "CubeSampling throws exception with too few points" begin + @test_throws TooFewSites CubeSampling(numsites = -1) + @test_throws TooFewSites CubeSampling(numsites = 0) + @test_throws TooFewSites CubeSampling(numsites = 1) +end diff --git a/src/exceptions.jl b/src/exceptions.jl new file mode 100644 index 0000000..5894a0f --- /dev/null +++ b/src/exceptions.jl @@ -0,0 +1,29 @@ +abstract type BONException <: Base.Exception end +abstract type SeederException <: BONException end + +Base.showerror(io::IO, e::E) where {E <: BONException} = + tprint( + io, + "{bold red}$(supertype(E)){/bold red} | {bold yellow}$(e.message){/bold yellow}\n", + ) + +function _check_arguments(sampler::BONSampler) + sampler.numsites > 1 || throw(TooFewSites()) + return nothing +end + +@kwdef struct TooFewSites <: BONException + message = "Number of sites to select must be at least two." +end +function check(::Type{TooFewSites}, sampler) + sampler.numsites > 1 || throw(TooFewSites()) + return nothing +end + +@kwdef struct TooManySites <: BONException + message = "Cannot select more sites than there are candidates." +end +function check(::Type{TooManySites}, sampler) + sampler.numsites <= maxsites(sampler) || throw(TooManySites()) + return nothing +end diff --git a/src/fractaltriad.jl b/src/fractaltriad.jl index 261feda..abc8172 100644 --- a/src/fractaltriad.jl +++ b/src/fractaltriad.jl @@ -1,10 +1,189 @@ -Base.@kwdef struct FractalTriad{IT <: Integer, FT <: AbstractFloat} <: SpatialSampler - numpoints::IT = 50 - padding::FT = 0.1 +""" + FractalTriad + +A `BONSampler` that generates `FractalTriad` designs +""" +Base.@kwdef struct FractalTriad{I <: Integer, F <: AbstractFloat} <: BONSampler + numsites::I = 81 + horizontal_padding::F = 0.1 + vertical_padding::F = 0.1 + dims::Tuple{I, I} = (100, 100) + function FractalTriad(numsites, horizontal_padding, vertical_padding, dims) + ft = new{typeof(numsites), typeof(horizontal_padding)}( + numsites, + horizontal_padding, + vertical_padding, + dims, + ) + check_arguments(ft) + return ft + end +end +_default_pool(ft::FractalTriad) = pool(ft.dims) + + +maxsites(ft::FractalTriad) = maximum(ft.dims) * 10 # gets numerically unstable for large values because float coords belong to the the same cell in the raster, and arctan breaks +function check_arguments(ft::FractalTriad) + check(TooManySites, ft) + ft.numsites > 9 || throw(TooFewSites("FractalTriad requires more than 9 or more sites")) + ft.numsites % 9 == 0 || + throw(ArgumentError("FractalTriad requires number of sites to be a multiple of 9")) + return end -function _generate!(ft::FractalTriad, sdm::M) where {M <: AbstractMatrix} - response = zeros(ft.numpoints, 2) +""" + _outer_triangle + +Takes a FractalTriad generator `ft` and returns the outermost triangle based on the size of the raster and the padding on the horizontal and vertical. +""" +function _outer_triangle(ft) + x, y = ft.dims + Δx, Δy = + Int.([floor(0.5 * ft.horizontal_padding * x), floor(0.5 * ft.vertical_padding * y)]) + + return CartesianIndex.([(Δx, Δy), (x ÷ 2, y - Δy), (x - Δx, Δy)]) +end + +""" + _hexagon(A, B, C) + +Takes a set of `CartesianIndex`'s that form the vertices of a triangle, and returns the `CartisianIndex`'s for each of the points that form the internal hexagon points + +For input vertices A, B, C, `_hexagon` returns the points on the edges of the triangle below in the order `[d,e,f,g,h,i]` + + B + + e f + + + d g + + A i h C + +-- + +After running `vcat(triangle, hex)`, the resulting indices form the 2-level triad with indices corresponding to points in the below manner: + + 2 + + + 5 6 + + + 4 7 + + 1 9 8 3 + + - +γ = |AB| +χ = |BC| + +θ = ⦤ BAC +α = ⦤ BCA + +TODO: this always assumes |AC| is horizontal. This could be changed later. +""" +function _hexagon(A, B, C) + γ = sqrt((B[1] - A[1])^2 + (B[2] - A[2])^2) # left side length + χ = sqrt((B[1] - C[1])^2 + (B[2] - C[2])^2) # right side length + + θ = atan((B[2] - A[2]) / (B[1] - A[1])) + α = atan((B[2] - C[2]) / (C[1] - B[1])) + + d = (A[1] + (γ * cos(θ) / 3), A[2] + γ * sin(θ) / 3) + e = (A[1] + 2γ * cos(θ) / 3, A[2] + 2γ * sin(θ) / 3) + f = (A[1] + (C[1] - A[1]) - (2χ * cos(α) / 3), A[2] + 2χ * sin(α) / 3) + g = (A[1] + (C[1] - A[1] - (χ * cos(α) / 3)), A[2] + χ * sin(α) / 3) + h = (A[1] + 2(C[1] - A[1]) / 3, A[2]) + i = (A[1] + (C[1] - A[1]) / 3, A[2]) + return [CartesianIndex(Int.([floor(x[1]), floor(x[2])])...) for x in [d, e, f, g, h, i]] +end + +""" + _fill_triangle!(coords, traingle, count) + +Takes a set of vertices of a triangle `triangle`, and fills the internal hexagon for those points. +""" +function _fill_triangle!(coords, triangle, count) + start = count + hex = _hexagon(triangle...) + for i in eachindex(hex) + coords[count] = CartesianIndex(hex[i]) + count += 1 + if count > length(coords) + return coords[start:(count - 1)], count + end + end + + return coords[start:(count - 1)], count +end + +""" + _generate!(coords::Vector{CartesianIndex}, ft::FractalTriad) + +Fills `coords` with a set of points generated using the `FractalTriad` generator `ft`. +""" +function _sample!( + coords::S, + candidates::C, + ft::FractalTriad, +) where {S<:Sites,C<:Sites} + base_triangle = _outer_triangle(ft) + + coords.coordinates .= CartesianIndex(1,1) + + coords.coordinates[1:3] .= base_triangle + count = 4 + + triangle = coords[1:3] + hex, count = _fill_triangle!(coords, triangle, count) + pack = vcat(triangle, hex) + vert_idxs = [[5, 2, 6], [1, 4, 9], [8, 7, 3]] + + pack_stack = [pack] + + while length(pack_stack) > 0 + pack = popat!(pack_stack, 1) + for idx in vert_idxs + triangle = pack[idx] + hex, count = _fill_triangle!(coords, triangle, count) + if count > ft.numsites + return coords + end + push!(pack_stack, vcat(triangle, hex)) + end + end + return coords +end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "FractalTriad is correct subtype" begin + @test FractalTriad <: BONSampler +end + +@testitem "FractalTriad default constructor works" begin + @test typeof(FractalTriad()) <: FractalTriad +end + +@testitem "FractalTriad can change number of sites with keyword argument" begin + ft = FractalTriad(; numsites = 18) + @test typeof(ft) <: FractalTriad + @test ft.numsites == 18 + + ft = FractalTriad(; numsites = 27) + @test typeof(ft) <: FractalTriad + @test ft.numsites == 27 + + @test_throws ArgumentError FractalTriad(numsites = 20) +end - return response +@testitem "FractalTriad throws error when too few points as passed as an argument" begin + @test_throws TooFewSites FractalTriad(; numsites = 9) + @test_throws TooFewSites FractalTriad(; numsites = -1) + @test_throws TooFewSites FractalTriad(; numsites = 0) end diff --git a/src/grts.jl b/src/grts.jl new file mode 100644 index 0000000..9551aa9 --- /dev/null +++ b/src/grts.jl @@ -0,0 +1,72 @@ +""" + GeneralizedRandomTessellatedStratified + +@Olsen +""" +@kwdef struct GeneralizedRandomTessellatedStratified <: BONSampler + numsites = 50 + dims = (100, 100) +end + +maxsites(grts::GeneralizedRandomTessellatedStratified) = prod(grts.dims) + +function check_arguments(grts::GeneralizedRandomTessellatedStratified) + check(TooManySites, grts) + check(TooFewSites, grts) + return +end + +function _quadrant_fill!(mat) + x, y = size(mat) + a, b = x ÷ 2, y ÷ 2 + quad_ids = shuffle([1, 2, 3, 4]) + mat[begin:a, begin:b] .= quad_ids[1] + mat[(a + 1):end, begin:b] .= quad_ids[2] + mat[begin:a, (b + 1):end] .= quad_ids[3] + mat[(a + 1):end, (b + 1):end] .= quad_ids[4] + return mat +end + +function _quadrant_split!(mat, grid_size) + x, y = size(mat) + + num_x_grids, num_y_grids = x ÷ grid_size, y ÷ grid_size + + for i in 0:(num_x_grids - 1), j in 0:(num_y_grids - 1) + a, b = i * grid_size, j * grid_size + bounds = (a + 1, b + 1), (a + grid_size, b + grid_size) + mat[bounds[1][1]:bounds[2][1], bounds[1][2]:bounds[2][2]] .= + _quadrant_fill!(mat[bounds[1][1]:bounds[2][1], bounds[1][2]:bounds[2][2]]) + end + + return mat +end + +""" +_generate!(coords::Vector{CartesianIndex}, grts::GeneralizedRandomTessellatedStratified) +""" +function _generate!( + coords::Vector{CartesianIndex}, + grts::GeneralizedRandomTessellatedStratified, +) + x, y = grts.dims # smallest multiple of 4 on each side + num_address_grids = Int(ceil(max(log(2, x), log(2, y)))) + + grid_sizes = reverse([2^i for i in 1:num_address_grids]) + + address_grids = + [zeros(Int, 2^num_address_grids, 2^num_address_grids) for _ in grid_sizes] + + map( + i -> _quadrant_split!(address_grids[i], grid_sizes[i]), + eachindex(address_grids), + ) + + code_numbers = sum([10^(i - 1) .* ag for (i, ag) in enumerate(address_grids)]) + sort_idx = sortperm([code_numbers[cidx] for cidx in eachindex(code_numbers)]) + + return filter( + idx -> idx[1] <= grts.dims[1] && idx[2] <= grts.dims[2], + CartesianIndices(code_numbers)[sort_idx], + )[1:(grts.numsites)] +end diff --git a/src/integrations/simplesdms.jl b/src/integrations/simplesdms.jl deleted file mode 100644 index be621f9..0000000 --- a/src/integrations/simplesdms.jl +++ /dev/null @@ -1,11 +0,0 @@ -@info "Loading BONs.jl support for SimpleSDMLayers.jl ..." - -function BiodiversityObservationNetworks.stack(layers::Vector{<:SpeciesDistributionToolkit.SimpleSDMLayers.SimpleSDMLayer}) - # assert all layers are the same size if we add this function to BONs.jl - mat = zeros(size(first(layers))..., length(layers)) - for (l,layer) in enumerate(layers) - thismin, thismax = extrema(layer) - mat[:,:,l] .= broadcast(x->isnothing(x) ? NaN : (x-thismin)/(thismax-thismin), layer.grid) - end - mat -end diff --git a/src/integrations/zygote.jl b/src/integrations/zygote.jl deleted file mode 100644 index 0a70496..0000000 --- a/src/integrations/zygote.jl +++ /dev/null @@ -1,36 +0,0 @@ -@info "Loading BONs.jl support for Zygote.jl ..." - - -function BiodiversityObservationNetworks.optimize(layers, loss; targets = 3, learningrate = 1e-4, numsteps = 10) - ndims(layers) == 3 || throw(ArgumentError("Layers must be a 3-dimensional array")) - typeof(loss) <: Function || throw(ArgumentError("`loss` must be a function")) - learningrate > 0.0 || throw(ArgumentError("learningrate must be positive")) - numsteps > 0 || throw(ArgumentError("numsteps must be positive")) - - W = rand(size(layers, 3), targets) - for (i,c) in enumerate(eachcol(W)) - W[:,i] .= c ./ sum(c) - end - - α = rand(targets) - α ./= sum(α) - - losses = zeros(numsteps) - - @showprogress for step in 1:numsteps - dL, ∂W, ∂α = learningrate .* Zygote.gradient(loss, layers, W, α) - W += ∂W - α += ∂α - - W = clamp.(W, 0, 1) - for (i,c) in enumerate(eachcol(W)) - W[:,i] .= c ./ sum(c) - end - - α = clamp.(α, 0, 1) - α ./= sum(α) - - losses[step] = loss(layers, W, α) - end - return W,α,losses -end diff --git a/src/optimize.jl b/src/optimize.jl deleted file mode 100644 index c12e1ba..0000000 --- a/src/optimize.jl +++ /dev/null @@ -1,33 +0,0 @@ -""" - What should the optimization API look like? - - - We have a bunch of rasters that are layers that we - can to combine in a layer 'stack'. - - We have a weights matrix W which has `r` rows and - `t` columns, where `r` is the number of layers in the stack, - and `t` is the number of optimization targets. - - We have a vector α of length `t` which sums to 1. - - We want a function `optimize` which takes - (a) a combined Seeder/Refiner - (b) an initial W and α - (c) hyperparameters for optimization - (d) a loss function comparing the sampled outcome to the 'true' state - - and uses Zygote's AD to optimize W and α - to reduce a loss function that describes the - difference between the "true" metaweb and - sampled one. - - In our context, the two targets are interaction classification - and network topology, so we want a loss function that combines - measures of these elements. -""" - - -function optimize() - missing -end \ No newline at end of file diff --git a/src/refine.jl b/src/refine.jl index 95ba701..cb23c96 100644 --- a/src/refine.jl +++ b/src/refine.jl @@ -1,19 +1,21 @@ +# No longer used + + """ - refine!(cooords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST, uncertainty::Matrix{T}) + refine!(cooords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST) Refines a set of candidate sampling locations in the preallocated vector `coords` -from a vector of coordinates `pool` using `sampler`, where `sampler` is a [`BONRefiner`](@ref). +from a vector of coordinates `pool` using `sampler`, where `sampler` is a [`BONRefiner`](@ref). """ function refine!( coords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST, - uncertainty::Matrix{T} -) where {ST <: BONRefiner, T <: AbstractFloat, N} - if length(coords) != sampler.numpoints +) where {ST <: BONRefiner} + if length(coords) != sampler.numsites throw( DimensionMismatch( - "The length of the coordinate vector must match the `numpoints` fields of the sampler", + "The length of the coordinate vector must match the `numsites` fields of the sampler", ), ) end @@ -24,62 +26,47 @@ function refine!( ), ) end - return _generate!(coords, copy(pool), sampler, uncertainty) + return _generate!(coords, copy(pool), sampler) end """ - refine!(cooords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST, uncertainty::Matrix{T}) + refine!(cooords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST) The curried version of `refine!`, which returns a function that acts on the input coordinate pool passed to the curried function (`p` below). """ function refine!(coords::Vector{CartesianIndex}, sampler::ST) where {ST <: BONRefiner} - if length(coords) != sampler.numpoints + if length(coords) != sampler.numsites throw( DimensionMismatch( - "The length of the coordinate vector must match the `numpoints` fields of the sampler", + "The length of the coordinate vector must match the `numsites` fields of the sampler", ), ) end - return (p, u) -> refine!(coords, copy(p), sampler, u) + return (p) -> refine!(coords, copy(p), sampler) end """ - refine(pool::Vector{CartesianIndex}, sampler::ST, uncertainty::Matrix{T}) + refine(pool::Vector{CartesianIndex}, sampler::ST) -Refines a set of candidate sampling locations and returns a vector `coords` of length numpoints +Refines a set of candidate sampling locations and returns a vector `coords` of length numsites from a vector of coordinates `pool` using `sampler`, where `sampler` is a [`BONRefiner`](@ref). """ function refine( pool::Vector{CartesianIndex}, sampler::ST, - uncertainty::Matrix{T}, -) where {ST <: BONRefiner, T <: AbstractFloat} - coords = Vector{CartesianIndex}(undef, sampler.numpoints) - return refine!(coords, copy(pool), sampler, uncertainty) +) where {ST <: BONRefiner} + coords = Vector{CartesianIndex}(undef, sampler.numsites) + return refine!(coords, copy(pool), sampler) end """ refine(sampler::BONRefiner) -Returns a curried function of `refine` with *two* methods: both are using the -output of `seed`, one in its packed form, the other in its splatted form. +Returns a curried function of `refine` """ function refine(sampler::ST) where {ST <: BONRefiner} - coords = Vector{CartesianIndex}(undef, sampler.numpoints) - _inner(p, u) = refine!(coords, copy(p), sampler, u) - _inner(p) = refine!(coords, first(p), sampler, last(p)) + coords = Vector{CartesianIndex}(undef, sampler.numsites) + _inner(p) = refine!(coords, first(p), sampler) return _inner end - -""" - refine(pack, sampler::BONRefiner) - -Calls `refine` on the appropriatedly splatted version of `pack`. -""" -function refine( - pack::Tuple{Vector{CartesianIndex}, Matrix{Float64}}, - sampler::ST, -) where {ST <: BONRefiner} - return refine(first(pack), sampler, last(pack)) -end \ No newline at end of file diff --git a/src/sample.jl b/src/sample.jl new file mode 100644 index 0000000..61c046e --- /dev/null +++ b/src/sample.jl @@ -0,0 +1,26 @@ + +function sample(alg::BONSampler) + _sample!( + _allocate_sites(numsites(alg)), + _default_pool(alg), + alg + ) +end + + +function sample(alg::BONSampler, l::L) where L<:Layer + _sample!( + _allocate_sites(numsites(alg)), + l, + alg + ) +end + + +function sample(alg::BONSampler, candidates::C) where C<:Sites + _sample!( + _allocate_sites(numsites(alg)), + candidates, + alg + ) +end diff --git a/src/seed.jl b/src/seed.jl deleted file mode 100644 index 055aadc..0000000 --- a/src/seed.jl +++ /dev/null @@ -1,64 +0,0 @@ -""" - seed!(coords::Vector{CartesianIndex}, sampler::ST, uncertainty::Matrix{T}) - -Puts a set of candidate sampling locations in the preallocated vector `coords` -from a raster `uncertainty` using `sampler`, where `sampler` is a [`BONSeeder`](@ref). - - - Seeder's work on rasters, refiners work on set of coordinates. -""" -function seed!( - coords::Vector{CartesianIndex}, - sampler::ST, - uncertainty::Matrix{T}, -) where {ST <: BONSeeder, T <: AbstractFloat} - if length(coords) != sampler.numpoints - throw( - DimensionMismatch( - "The length of the coordinate vector must match the `numpoints` fields of the sampler", - ), - ) - end - return _generate!(coords, sampler, uncertainty) -end - -""" - seed!(coords::Vector{CartesianIndex}, sampler::ST) - -The curried version of `seed!`, which returns a function that acts on the input -uncertainty layer passed to the curried function (`u` below). -""" -function seed!(coords::Vector{CartesianIndex}, sampler::ST) where {ST <: BONSeeder} - if length(coords) != sampler.numpoints - throw( - DimensionMismatch( - "The length of the coordinate vector must match the `numpoints` fields of the sampler", - ), - ) - end - return (u) -> seed!(coords, sampler, u) -end - -""" - seed(sampler::ST, uncertainty::Matrix{T}) - -Produces a set of candidate sampling locations in a vector `coords` of length numpoints -from a raster `uncertainty` using `sampler`, where `sampler` is a [`BONSeeder`](@ref). -""" -function seed( - sampler::ST, - uncertainty::Matrix{T}, -) where {ST <: BONSeeder, T <: AbstractFloat} - coords = Vector{CartesianIndex}(undef, sampler.numpoints) - return seed!(coords, sampler, uncertainty) -end - -""" - seed!(coords::Vector{CartesianIndex}, sampler::ST) - -The curried version of `seed!`, which returns a function that acts on the input -uncertainty layer passed to the curried function (`u` below). -""" -function seed(sampler::ST) where {ST <: BONSeeder} - coords = Vector{CartesianIndex}(undef, sampler.numpoints) - return (u) -> seed!(coords, sampler, u) -end diff --git a/src/simplerandom.jl b/src/simplerandom.jl index baa5ec8..ce853db 100644 --- a/src/simplerandom.jl +++ b/src/simplerandom.jl @@ -3,27 +3,64 @@ Implements Simple Random spatial sampling (each location has equal probability of selection) """ -Base.@kwdef struct SimpleRandom{I <: Integer} <: BONSeeder - numpoints::I = 50 - function SimpleRandom(numpoints) - if numpoints < one(numpoints) - throw( - ArgumentError( - "You cannot have a SimpleRandom seeder with fewer than one point", - ), - ) - end - return new{typeof(numpoints)}(numpoints) +Base.@kwdef struct SimpleRandom{I<:Integer} <: BONSampler + numsites::I = 30 + function SimpleRandom(numsites::I) where{I<:Integer} + srs = new{typeof(numsites)}(numsites) + check_arguments(srs) + return srs end end -function _generate!( - coords::Vector{CartesianIndex}, - sampler::SimpleRandom, - uncertainty::Matrix{T}, -) where {T <: AbstractFloat} - pool = CartesianIndices(uncertainty) +function check_arguments(srs::SRS) where {SRS <: SimpleRandom} + check(TooFewSites, srs) + return nothing +end + +_default_pool(::SimpleRandom) = pool((50,50)) + +function sample(::SimpleRandom, ::Layer) +end +function sample(::SimpleRandom, ::Matrix) +end +function sample(sr::SimpleRandom, T::Tuple{I,I}) where I <: Integer + +end + + + + +function _sample!( + selections::S, + candidates::C, + sampler::SimpleRandom{I}, +) where {S<:Sites,C<:Sites,I} + _coords = Distributions.sample(candidates.coordinates, sampler.numsites; replace = false) + for (i,c) in enumerate(_coords) + selections[i] = c + end + return selections +end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "SimpleRandom constructor with no arguments works" begin + sr = SimpleRandom() + @test typeof(sr) <: SimpleRandom +end + +@testitem "SimpleRandom must have more than one point" begin + @test_throws TooFewSites SimpleRandom(numsites = -1) + @test_throws TooFewSites SimpleRandom(numsites = 0) + @test_throws TooFewSites SimpleRandom(numsites = 1) +end - coords .= sample(pool, sampler.numpoints; replace = false) - return (coords, uncertainty) +@testitem "SimpleRandom allows keyword arguments for number of points" begin + N = 314 + srs = SimpleRandom(; numsites = N) + @test srs.numsites == N end diff --git a/src/spatialstratified.jl b/src/spatialstratified.jl new file mode 100644 index 0000000..e702978 --- /dev/null +++ b/src/spatialstratified.jl @@ -0,0 +1,121 @@ +""" + SpatiallyStratified +""" +@kwdef struct SpatiallyStratified{I<:Integer,L<:Layer,F<:AbstractFloat,N} <: BONSampler + numsites::I = 50 + layer::L = _default_strata((50, 50)) + category_dict::Dict{I,N} = _default_categories() + inclusion_dict::Dict{N,F} = _default_inclusion() + function SpatiallyStratified( + numsites::I, + layer::L, + category_dict::Dict{I,N}, + inclusion_dict::Dict{N,F} + ) where {I<:Integer,L<:Layer,F<:AbstractFloat,N} + ss = new{I,L,F,N}( + numsites, + layer, + category_dict, + inclusion_dict + ) + check_arguments(ss) + return ss + end +end + + +function check_arguments(ss::SpatiallyStratified) + check(TooFewSites, ss) + + length(ss.category_dict) == length(ss.inclusion_dict) || throw( + ArgumentError( + "Inclusion probability vector does not have the same number of strata as there are unique values in the strata matrix", + ), + ) + + return sum(values(ss.inclusion_dict)) ≈ 1.0 || + throw(ArgumentError("Inclusion probabilities across all strata do not sum to 1.")) +end + + +function _sample!( + coords::S, + candidates::C, + sampler::SpatiallyStratified{I,L,F,N}, +) where {S<:Sites,C<:Sites,I,L,F,N} + n = sampler.numsites + strata = sampler.layer + categories = sampler.category_dict + + category_ids = sort(collect(keys(categories))) + candidate_ids = [strata.layer[x] for x in coordinates(candidates)] + + cat_idx = Dict() + inclusion_probs = F[] + for k in category_ids + cat_idx[k] = findall(isequal(k), candidate_ids) + if length(cat_idx[k]) > 0 + push!(inclusion_probs, sampler.inclusion_dict[categories[k]]) + else + push!(inclusion_probs, 0) + end + end + + inclusion_probs ./= sum(inclusion_probs) + + # check if there are empty categories, if so set incl prob to 0 and + # renormalize? + selected_cats = rand(Categorical(inclusion_probs), n) + for (i,c) in enumerate(selected_cats) + if length(cat_idx[c]) > 0 + coords[i] = candidates[rand(cat_idx[c])] + end + end + return coords +end + +# Utils +_default_pool(::SpatiallyStratified) = pool(_default_strata((50,50))) +_default_categories() = Dict{Int,String}(1=>"A", 2=>"B", 3=>"C") +_default_inclusion() = Dict{String,Float64}("A"=>0.5, "B"=>0.3, "C"=>0.2) + +function _default_strata(sz) + mat = zeros(typeof(sz[1]), sz...) + + x = sz[1] ÷ 2 + y = sz[2] ÷ 3 + + mat[begin:x, :] .= 1 + mat[(x + 1):end, begin:y] .= 2 + mat[(x + 1):end, (y + 1):end] .= 3 + + return Layer(mat) +end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "SpatiallyStratified default constructor works" begin + @test typeof(SpatiallyStratified()) <: SpatiallyStratified +end + +@testitem "SpatiallyStratified with default arguments can generate points" begin + ss = SpatiallyStratified() + coords = sample(ss) + @test typeof(coords) <: Sites +end + +@testitem "SpatiallyStratified throws error when number of sites is below 2" begin + @test_throws TooFewSites SpatiallyStratified(numsites = -1) + @test_throws TooFewSites SpatiallyStratified(numsites = 0) + @test_throws TooFewSites SpatiallyStratified(numsites = 1) +end + +@testitem "SpatiallyStratified can use custom number of points as keyword argument" begin + NUM_POINTS = 42 + ss = SpatiallyStratified(; numsites = NUM_POINTS) + @test ss.numsites == NUM_POINTS +end diff --git a/src/types.jl b/src/types.jl index 7bfb39b..358594a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,24 +1,64 @@ """ - abstract type BONSeeder end + abstract type BONSampler end -A `BONSeeder` is an algorithm for proposing sampling locations using a raster of -weights, represented as a matrix, in each cell. +A `BONSampler` is any algorithm for proposing a set of sampling locations. """ -abstract type BONSeeder end +abstract type BONSampler end -""" - abstract type BONRefiner end +numsites(s::BONSampler) = s.numsites -A `BONRefiner` is an algorithm for proposing sampling locations by _refining_ a -set of candidate points to a smaller set of 'best' points. """ -abstract type BONRefiner end + mutable struct Sites{T} """ - BONSampler +mutable struct Sites{T} + coordinates::Vector{T} +end +_allocate_sites(n) = Sites(Array{CartesianIndex}(undef, n)) +coordinates(s::Sites) = s.coordinates +Base.getindex(s::Sites, i::Integer) = getindex(coordinates(s), i) +Base.getindex(s::Sites, i::UnitRange) = getindex(coordinates(s), i) -A union of the abstract types `BONSeeder` and `BONRefiner`. Both types return a -tuple with the coordinates as a vector of `CartesianIndex`, and the weight -matrix as a `Matrix` of `AbstractFloat`, in that order. -""" -const BONSampler = Union{BONSeeder, BONRefiner} +Base.setindex!(s::Sites, v, i::Integer) = setindex!(coordinates(s), v,i) +Base.length(s::Sites) = length(coordinates(s)) +Base.eachindex(s::Sites) = eachindex(s.coordinates) + +abstract type LayerType end + +abstract type DataLayer <: LayerType end +struct CategoricalData <: DataLayer end +struct ContinousData <: DataLayer end + +abstract type InclusionProbability <: LayerType end +abstract type ResultLayer <: LayerType end + + +# Distribution over categorical values at each pixel +# Scalar at each pixel +struct CategoricalResult <: LayerType end +struct ContinuousResult <: LayerType end + +# Layer +struct Layer{T<:LayerType,L} + layer::L +end +Base.size(l::Layer) = size(l.layer) +function Layer(l::S; categorical = false) where S<:SimpleSDMLayers.SDMLayer + T = categorical ? CategoricalData : ContinousData + Layer{T,S}(l) +end +Layer(m::Matrix{I}) where I<:Integer = Layer{CategoricalData,typeof(m)}(m) +Layer(m::Matrix{R}) where R<:Real = Layer{ContinousData,typeof(m)}(m) + +_indices(m::M) where M<:Matrix = findall(i->!isnothing(m[i]) && !isnan(m[i]) && !ismissing(m[i]), CartesianIndices(m)) +_indices(l::SDMLayer) = findall(l.indices) + +# Pool method +pool(l::Layer) = Sites(vec(_indices(l.layer))) +pool(dims::Tuple{I,I}) where I<:Integer = Sites(vec(collect(CartesianIndices(dims)))) + + +# Layer set w/ names +struct Stack{T<:LayerType,N,L} + layers::Dict{N,Layer{T,L}} +end diff --git a/src/uniqueness.jl b/src/uniqueness.jl index c6aace3..9b5033d 100644 --- a/src/uniqueness.jl +++ b/src/uniqueness.jl @@ -1,56 +1,83 @@ """ Uniqueness -A `BONRefiner`. +A `BONSampler` """ -Base.@kwdef mutable struct Uniqueness{I <: Integer, T<:Number} <: BONRefiner - numpoints::I = 30 - layers::Array{T,3} - function Uniqueness(numpoints, layers::Array{T,N}) where {T,N} - if numpoints < one(numpoints) - throw( - ArgumentError( - "You cannot have a Uniqueness sampler with less than one point", - ), - ) - end - if N != 3 - throw( - ArgumentError( - "You cannot have a Uniqueness sampler without layers passed as a cube.", - ), - ) - end - return new{typeof(numpoints),T}(numpoints, layers) +Base.@kwdef struct Uniqueness{I <: Integer, T <: AbstractFloat} <: BONSampler + numsites::I = 30 + layers::Array{T, 3} = rand(50, 50, 3) + function Uniqueness(numsites, layers) + uniq = new{typeof(numsites), typeof(layers[begin])}(numsites, layers) + check_arguments(uniq) + return uniq end end +maxsites(uniq::Uniqueness) = prod(size(uniq.layers)[1:2]) + +function check_arguments(uniq::Uniqueness) + check(TooFewSites, uniq) + check(TooManySites, uniq) + + length(size(uniq.layers)) == 3 || + throw( + ArgumentError( + "You cannot have a Uniqueness sampler without layers passed as a 3-dimenisional array, where the first two axes are latitude and longitude.", + ), + ) + + return nothing +end function _generate!( coords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::Uniqueness, - uncertainty, -) where {T <: AbstractFloat} +) layers = sampler.layers - ndims(layers) <= 2 && throw(ArgumentError("Uniqueness needs more than one layer to work.")) - size(uncertainty) != (size(layers,1), size(layers,2)) && throw(DimensionMismatch("Layers are not the same dimension as uncertainty")) + ndims(layers) <= 2 && + throw(ArgumentError("Uniqueness needs more than one layer to work.")) + size(uncertainty) != (size(layers, 1), size(layers, 2)) && + throw(DimensionMismatch("Layers are not the same dimension as uncertainty")) - - covscore = zeros(length(pool)) - for (i,p1) in enumerate(pool) + total_covariance = zeros(length(pool)) + for (i, p1) in enumerate(pool) v1 = layers[p1[1], p1[2], :] - for (j,p2) in enumerate(pool) - v2 = layers[p2[1], p2[2],:] + for (j, p2) in enumerate(pool) + v2 = layers[p2[1], p2[2], :] if p1 != p2 - covscore[i] += abs(cov(v1,v2)) + total_covariance[i] += abs(cov(v1, v2)) end - end + end end - np = sampler.numpoints - sortedvals = sortperm(vec(covscore)) - + np = sampler.numsites + sortedvals = sortperm(vec(total_covariance)) + coords[:] .= pool[sortedvals[1:np]] - return (coords, uncertainty) -end \ No newline at end of file + return coords +end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "Uniqueness default constructor works" begin + @test typeof(Uniqueness()) <: Uniqueness +end + +@testitem "Uniqueness requires more than one point" begin + @test_throws TooFewSites Uniqueness(numsites = -1) + @test_throws TooFewSites Uniqueness(numsites = 0) + @test_throws TooFewSites Uniqueness(numsites = 1) +end + +@testitem "Uniqueness throws error if more points are requested than are possible" begin + @test_throws TooManySites Uniqueness(numsites = 26, layers = rand(5, 5, 2)) +end + +@testitem "Uniqueness works with positional constructor" begin + @test typeof(Uniqueness(2, rand(5, 5, 5))) <: Uniqueness +end diff --git a/src/utils.jl b/src/utils.jl index 5fb2b59..d856673 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -2,64 +2,12 @@ function stack(layers::Vector{<:AbstractMatrix}) # assert all layers are the same size if we add this function to BONs.jl mat = zeros(size(first(layers))..., length(layers)) - for (l,layer) in enumerate(layers) + for (l, layer) in enumerate(layers) nonnanvals = vec(layer[findall(!isnan, layer)]) thismin, thismax = findmin(nonnanvals)[1], findmax(nonnanvals)[1] - mat[:,:,l] .= broadcast(x->isnan(x) ? NaN : (x-thismin)/(thismax-thismin), layer) + mat[:, :, l] .= + broadcast(x -> isnan(x) ? NaN : (x - thismin) / (thismax - thismin), layer) end - mat + return mat end - -function _squish(layers::Array{T, 3}, W::Matrix{T}) where {T <: AbstractFloat} - size(W,1) == size(layers,3) || throw(ArgumentError("W does not have the same number of rows are there are number of layers")) - all([sum(c) ≈ 1 for c in eachcol(W)]) || throw(ArgumentError("Not all of the columns of W sum to 1.")) - - return convert(Array, slicemap(x -> x * W, layers; dims = (2, 3))) -end - -function _squish(layers::Array{T, 3}, α::Vector{T}) where {T <: AbstractFloat} - length(α) == size(layers,3) || throw(ArgumentError("α is not the same length as number of layers")) - sum(α) ≈ 1 || throw(ArgumentError("α does not sum to 1.0")) - return slicemap(x -> x * reshape(α, (length(α), 1)), layers; dims = (2, 3))[:, :, 1] -end - -""" - squish(layers, W, α) - -Takes a set of `n` layers and squishes them down -to a single layer. - - - numcolumns = size(W,2) - for i in 1:numcolumns - W[:,i] ./= sum(W[:,i]) - end - -For a coordinate in the raster (i,j), denote the vector of -values across all locations at that coordinate v⃗ᵢⱼ. The value -at that coordinate in squished layer, s⃗ᵢⱼ, is computed in two steps. - -**(1):** First we apply a weights matrix, `W``, with `n` rows and `m` columns (`m` < `n`), to -reduce the initial `n` layers down to a set of `m` layers, each of which corresponds -to a particular target of optimization. For example, we may want to propose sampling -locations that are optimized to best sample abalance multiple criteria, like (a) the -current distribution of a species and (b) if that distribution is changing over time. - -Each entry in the weights matrix `W` corresponds to the 'importance' of the layer -in the corresponding row to the successful measurement of the target of the corresponding -column. As such, each column of `W` must sum to 1.0. -using Optim - -For each location, the value of the condensed layer `tᵢ`, corresponding to target `i`, at -coordinate (i,j) is given by the dot product of v⃗ᵢⱼ and the `i`-th column of `W`. - -**(2):** Apply a weighted average across each target layer. To produce the final output layer, -we apply a weighted average to each target layer, where the weights are provided in the vector α⃗ -of length `m`. - -The final value of the squished layer at (i,j) is given by s⃗ᵢⱼ = ∑ₓ αₓ*tᵢⱼ(x), where tᵢⱼ(x) is -the value of the x-th target layer at (i,j). -""" -squish(layers, W, α) = _squish(_squish(layers, W), α) - diff --git a/src/weightedbas.jl b/src/weightedbas.jl new file mode 100644 index 0000000..8c33fe4 --- /dev/null +++ b/src/weightedbas.jl @@ -0,0 +1,101 @@ +""" + WeightedBalancedAcceptance + +A `BONSeeder` that uses Balanced-Acceptance Sampling [@cite] combined with rejection sampling to create a set of sampling sites that is weighted toward values with higher uncertainty as a function of the parameter α. +""" +Base.@kwdef struct WeightedBalancedAcceptance{I <: Integer, F <: Real} <: BONSampler + numsites::I = 30 + α::F = 1.0 + function WeightedBalancedAcceptance(numsites, α) + wbas = new{typeof(numsites), typeof(α)}(numsites, α) + check_arguments(wbas) + return wbas + end +end + + +function check_arguments(wbas::WeightedBalancedAcceptance) + check(TooFewSites, wbas) + wbas.α > 0 || + throw( + ArgumentError("WeightedBalancedAcceptance requires α to be greater than 0 "), + ) + return nothing +end + +function _sample!( + coords::S, + candidates::C, + sampler::WeightedBalancedAcceptance{I, T}, + weights::L +) where {S<:Sites,C<:Sites,I <: Integer, T <: AbstractFloat,L<:Layer} + seed = rand(I.(1e0:1e7), 2) + α = sampler.α + x, y = size(weights) + + nonnan_indices = findall(!isnan, weights) + stduncert = similar(weights) + + uncert_values = weights[nonnan_indices] + stduncert_values = similar(uncert_values) + zfit = nothing + if var(uncert_values) > 0 + zfit = StatsBase.fit(ZScoreTransform, uncert_values) + stduncert_values = StatsBase.transform(zfit, uncert_values) + end + + nonnan_counter = 1 + for i in eachindex(weights) + if isnan(weights[i]) + stduncert[i] = NaN + elseif !isnothing(zfit) + stduncert[i] = stduncert_values[nonnan_counter] + nonnan_counter += 1 + else + stduncert[i] = 1.0 + end + end + + reluncert = broadcast(x -> isnan(x) ? NaN : exp(α * x) / (1 + exp(α * x)), stduncert) + ptct = 1 + addedpts = 1 + while addedpts <= length(coords) + i, j = haltonvalue(seed[1] + ptct, 2), haltonvalue(seed[2] + ptct, 3) + candcoord = CartesianIndex(convert.(Int32, [ceil(x * i), ceil(y * j)])...) + prob = reluncert[candcoord] + if !isnan(prob) && rand() < prob + coords[addedpts] = candcoord + addedpts += 1 + end + ptct += 1 + end + + return coords +end + +# ==================================================== +# +# Tests +# +# ===================================================== + +@testitem "WeightedBalancedAcceptance default constructor works" begin + @test typeof(WeightedBalancedAcceptance()) <: WeightedBalancedAcceptance +end + +@testitem "WeightedBalancedAcceptance requires positive number of sites" begin + @test_throws TooFewSites WeightedBalancedAcceptance(numsites = 0) + @test_throws TooFewSites WeightedBalancedAcceptance(numsites = 1) +end + +@testitem "WeightedBalancedAcceptance can take bias parameter α as keyword argument" begin + α = 3.14159 + wbas = WeightedBalancedAcceptance(; α = α) + @test wbas.α == α +end + +@testitem "WeightedBalancedAcceptance can take number of points as keyword argument" begin + N = 40 + wbas = WeightedBalancedAcceptance(; numsites = N) + @test wbas.numsites == N +end diff --git a/test/Project.toml b/test/Project.toml index 6381020..31a7127 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,4 +4,4 @@ NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" SpeciesDistributionToolkit = "72b53823-5c0b-4575-ad0e-8e97227ad13b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" diff --git a/test/adaptivespatial.jl b/test/adaptivespatial.jl index 0be95ca..945ea18 100644 --- a/test/adaptivespatial.jl +++ b/test/adaptivespatial.jl @@ -12,29 +12,29 @@ using Test # Test with a random uncertainty matrix U = rand(20, 20) -pack = seed(BalancedAcceptance(; numpoints = 30), U) +pack = seed(BalancedAcceptance(; numsites = 30), U) c = Vector{CartesianIndex}(undef, 15) smpl = AdaptiveSpatial(length(c)) # Length and element type -@test length(first(refine(pack, smpl))) == smpl.numpoints +@test length(first(refine(pack, smpl))) == smpl.numsites @test eltype(first(refine(first(pack), smpl, U))) == CartesianIndex # Test with an existing coordinates vector @test_throws DimensionMismatch refine!( c, first(pack), - AdaptiveSpatial(; numpoints = length(c) - 1), + AdaptiveSpatial(; numsites = length(c) - 1), last(pack), ) # Test the curried version -@test length(first(refine(AdaptiveSpatial(; numpoints = 12))(pack...))) == 12 +@test length(first(refine(AdaptiveSpatial(; numsites = 12))(pack...))) == 12 # Test the curried allocating version -@test length(first(refine!(c, AdaptiveSpatial(; numpoints = length(c)))(pack...))) == +@test length(first(refine!(c, AdaptiveSpatial(; numsites = length(c)))(pack...))) == length(c) -@test_throws DimensionMismatch refine!(c, AdaptiveSpatial(; numpoints = length(c) - 1))( +@test_throws DimensionMismatch refine!(c, AdaptiveSpatial(; numsites = length(c) - 1))( pack..., ) diff --git a/test/balancedacceptance.jl b/test/balancedacceptance.jl index c3407c3..e2590c4 100644 --- a/test/balancedacceptance.jl +++ b/test/balancedacceptance.jl @@ -4,7 +4,9 @@ using BiodiversityObservationNetworks using Test # Must have one point or more -@test_throws ArgumentError BalancedAcceptance(0, 1.0) +@test_throws TooFewSites BalancedAcceptance(0, 1.0) + +@test_throws TooManySites seed(BalancedAcceptance(; numsites = 26), rand(5, 5)) # Must have a positive alpha @test_throws ArgumentError BalancedAcceptance(1, -0.01) @@ -18,19 +20,19 @@ using Test # Test with a random uncertainty matrix U = rand(20, 20) -@test length(first(seed(BalancedAcceptance(; numpoints = 10), U))) == 10 -@test length(first(seed(BalancedAcceptance(; numpoints = 20), U))) == 20 -@test eltype(first(seed(BalancedAcceptance(; numpoints = 10), U))) == CartesianIndex +@test length(first(seed(BalancedAcceptance(; numsites = 10), U))) == 10 +@test length(first(seed(BalancedAcceptance(; numsites = 20), U))) == 20 +@test eltype(first(seed(BalancedAcceptance(; numsites = 10), U))) == CartesianIndex # Test with an existing coordinates vector c = Vector{CartesianIndex}(undef, 20) -@test_throws DimensionMismatch seed!(c, BalancedAcceptance(; numpoints = length(c) - 1), U) +@test_throws DimensionMismatch seed!(c, BalancedAcceptance(; numsites = length(c) - 1), U) # Test the curried version -@test length(first(seed(BalancedAcceptance(; numpoints = 12))(U))) == 12 +@test length(first(seed(BalancedAcceptance(; numsites = 12))(U))) == 12 # Test the curried allocating version -@test length(first(seed!(c, BalancedAcceptance(; numpoints = length(c)))(U))) == length(c) -@test_throws DimensionMismatch seed!(c, BalancedAcceptance(; numpoints = length(c) - 1))(U) +@test length(first(seed!(c, BalancedAcceptance(; numsites = length(c)))(U))) == length(c) +@test_throws DimensionMismatch seed!(c, BalancedAcceptance(; numsites = length(c) - 1))(U) -end \ No newline at end of file +end diff --git a/test/cubesampling.jl b/test/cubesampling.jl index 1781b66..a29df73 100644 --- a/test/cubesampling.jl +++ b/test/cubesampling.jl @@ -4,35 +4,35 @@ using BiodiversityObservationNetworks using Test # Must have one point or more -@test_throws ArgumentError CubeSampling(numpoints = 0) +@test_throws ArgumentError CubeSampling(numsites = 0) # Must select fewer points than number of candidate points -@test_throws ArgumentError CubeSampling(numpoints = 20, πₖ = zeros(10), x = rand(0:4, 2, 10)) +@test_throws ArgumentError CubeSampling(numsites = 20, πₖ = zeros(10), x = rand(0:4, 2, 10)) # Must have the same number of inclusion probabilites as auxillary variables -@test_throws DimensionMismatch CubeSampling(numpoints = 5, πₖ = zeros(15), x = rand(0:4, 2, 10)) +@test_throws DimensionMismatch CubeSampling(numsites = 5, πₖ = zeros(15), x = rand(0:4, 2, 10)) # Correct subtype -@test typeof(CubeSampling(numpoints = 5, x = rand(0:4, 2, 10))) <: BONRefiner -@test typeof(CubeSampling(numpoints = 5, x = rand(0:4, 2, 10)))<: BONSampler +@test typeof(CubeSampling(numsites = 5, x = rand(0:4, 2, 10))) <: BONRefiner +@test typeof(CubeSampling(numsites = 5, x = rand(0:4, 2, 10)))<: BONSampler # Test with a random uncertainty matrix N = 100 U = rand(20, 20) -pack = seed(BalancedAcceptance(; numpoints = N), U) +pack = seed(BalancedAcceptance(; numsites = N), U) c = Vector{CartesianIndex}(undef, 15) x_mat = rand(0:4, 2, N) -smpl = CubeSampling(numpoints = length(c), x = x_mat) +smpl = CubeSampling(numsites = length(c), x = x_mat) # Length and element type -@test length(first(refine(pack, smpl))) == smpl.numpoints +@test length(first(refine(pack, smpl))) == smpl.numsites @test eltype(first(refine(first(pack), smpl, U))) == CartesianIndex # Test with an existing coordinates vector @test_throws DimensionMismatch refine!( c, first(pack), - CubeSampling(; numpoints = length(c) - 1, x = x_mat), + CubeSampling(; numsites = length(c) - 1, x = x_mat), last(pack), ) diff --git a/test/runtests.jl b/test/runtests.jl index 7c50ef0..b9e874d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,33 +1,3 @@ -using Test -using BiodiversityObservationNetworks +using TestItemRunner -tests = [ - "\033[1m\033[34mSEEDER\033[0m Balance Acceptance Sampling" => "balancedacceptance.jl", - "\033[1m\033[35mREFINER\033[0m Adaptive Spatial Sampling" => "adaptivespatial.jl", - "\033[1m\033[36mHELPER\033[0m Cube Sampling " => "cubesampling.jl", - "\033[1m\033[35mREFINER\033[0m Uniqueness" => "uniqueness.jl", - "\033[1m\033[36mHELPER\033[0m Entropize" => "entropize.jl", - "\033[1m\033[36mHELPER\033[0m Squish" => "squish.jl", - "\033[1m\033[36mHELPER\033[0m Stack" => "stack.jl", - "\033[1m\033[36mHELPER\033[0m Optimize" => "optimize.jl", - -] - -global anyerrors = false -for test in tests - try - include(test.second) - println("\033[1m\033[32m✓\033[0m\t$(test.first)") - catch e - global anyerrors = true - println("\033[1m\033[31m×\033[0m\t$(test.first)") - println("\033[1m\033[38m→\033[0m\ttest/$(test.second)") - showerror(stdout, e, backtrace()) - println() - break - end -end - -if anyerrors - throw("Tests failed") -end +@run_package_tests diff --git a/test/squish.jl b/test/squish.jl index 8eafaa2..8b13789 100644 --- a/test/squish.jl +++ b/test/squish.jl @@ -1,51 +1 @@ -# Need to write tests for: -# - squish -# - optimize -# - stack -# - integrations -module BONTestSquish - using BiodiversityObservationNetworks - using Distributions - using Test - - function makelayers(nl) - dims = 50,50 - layers = zeros(dims...,nl) - for l in 1:nl - layers[:,:,l] .= rand(dims) - end - layers - end - - layers = makelayers(5) - ntargs = 3 - α = rand(Dirichlet([1 for t in 1:ntargs])) - W = rand(5, ntargs) - for i in 1:size(W,2) - W[:,i] .= W[:,i] ./ sum(W[:,i]) - end - @test typeof(squish(layers, W, α)) <: Matrix - - layers = makelayers(5) - ntargs = 3 - α = rand(Dirichlet([1 for t in 1:ntargs])) - W = rand(5, ntargs) - @test_throws ArgumentError squish(layers, W, α) - - W = rand(4, ntargs) - @test_throws ArgumentError squish(layers, W, α) - - - α = [1., 2., 3.] - @test_throws ArgumentError squish(layers, W, α) - - α = rand(Dirichlet([1 for t in 1:ntargs+1])) - @test_throws ArgumentError squish(layers, W, α) - - layers = makelayers(5) - W = rand(4, ntargs) - α = rand(Dirichlet([1 for t in 1:ntargs])) - @test_throws ArgumentError squish(layers, W, α) - -end \ No newline at end of file diff --git a/test/stack.jl b/test/stack.jl index 3d307bc..b3cae6e 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -1,21 +1,21 @@ module BONTestStack - using BiodiversityObservationNetworks - using SpeciesDistributionToolkit - using Test +using BiodiversityObservationNetworks +# using SpeciesDistributionToolkit +using Test - #=nl = 10 - layers = [rand(50,50) for i in 1:nl] +#=nl = 10 +layers = [rand(50,50) for i in 1:nl] - @test typeof(BiodiversityObservationNetworks.stack(layers)) <: Array{T,3} where T - @test size(BiodiversityObservationNetworks.stack(layers),3) == nl +@test typeof(BiodiversityObservationNetworks.stack(layers)) <: Array{T,3} where T +@test size(BiodiversityObservationNetworks.stack(layers),3) == nl - bbox = (left=-83.0, bottom=46.4, right=-55.2, top=63.7) - temp, precip, elevation = - SimpleSDMPredictor(rand(50,50); bbox...), - SimpleSDMPredictor(rand(50,50); bbox...), - SimpleSDMPredictor(rand(50,50); bbox...) - - layers = [temp,precip,elevation] - @test typeof(BiodiversityObservationNetworks.stack(layers)) <: Array{T,3} where T - =# -end +bbox = (left=-83.0, bottom=46.4, right=-55.2, top=63.7) +temp, precip, elevation = + SimpleSDMPredictor(rand(50,50); bbox...), + SimpleSDMPredictor(rand(50,50); bbox...), + SimpleSDMPredictor(rand(50,50); bbox...) + +layers = [temp,precip,elevation] +@test typeof(BiodiversityObservationNetworks.stack(layers)) <: Array{T,3} where T +=# +end diff --git a/test/uniqueness.jl b/test/uniqueness.jl index 3cb425d..4f4a0a3 100644 --- a/test/uniqueness.jl +++ b/test/uniqueness.jl @@ -4,40 +4,40 @@ using BiodiversityObservationNetworks using Test # Must have one point or more -@test_throws ArgumentError Uniqueness(0, zeros(5,5,5)) -@test_throws ArgumentError Uniqueness(0, zeros(5,5)) +@test_throws ArgumentError Uniqueness(0, zeros(5, 5, 5)) +@test_throws ArgumentError Uniqueness(0, zeros(5, 5)) # Parametric constructor -@test typeof(Uniqueness(Int32(2), zeros(Float32, 5,5,5))) == Uniqueness{Int32,Float32} +@test typeof(Uniqueness(Int32(2), zeros(Float32, 5, 5, 5))) == Uniqueness{Int32,Float32} # Correct subtype -@test typeof(Uniqueness(2, zeros(5,5,5))) <: BONRefiner -@test typeof(Uniqueness(2, zeros(5,5,5))) <: BONSampler +@test typeof(Uniqueness(2, zeros(5, 5, 5))) <: BONRefiner +@test typeof(Uniqueness(2, zeros(5, 5, 5))) <: BONSampler # Test with a random uncertainty matrix layers = rand(20, 20, 5) np = 25 -pack = seed(BalancedAcceptance(; numpoints = np), rand(20,20)) +pack = seed(BalancedAcceptance(; numsites=np), rand(20, 20)) -@test length(first(refine(pack, Uniqueness(; numpoints = 10, layers=layers)))) == 10 -@test length(first(refine(pack, Uniqueness(; numpoints = 20, layers=layers)))) == 20 -@test eltype(first(refine(pack, Uniqueness(; numpoints = 10, layers=layers)))) == CartesianIndex +@test length(first(refine(pack, Uniqueness(; numsites=10, layers=layers)))) == 10 +@test length(first(refine(pack, Uniqueness(; numsites=20, layers=layers)))) == 20 +@test eltype(first(refine(pack, Uniqueness(; numsites=10, layers=layers)))) == CartesianIndex # Test with an existing coordinates vector c = Vector{CartesianIndex}(undef, np) @test_throws DimensionMismatch refine!( c, first(pack), - Uniqueness(; numpoints = length(c) - 1, layers=layers), + Uniqueness(; numsites=length(c) - 1, layers=layers), last(pack), ) # Test the curried allocating version -@test length(first(refine(Uniqueness(; numpoints = 12, layers=layers))(pack...))) == 12 +@test length(first(refine(Uniqueness(; numsites=12, layers=layers))(pack...))) == 12 # Test the curried allocating version -@test length(first(refine!(c, Uniqueness(; numpoints = length(c), layers=layers))(pack...))) == +@test length(first(refine!(c, Uniqueness(; numsites=length(c), layers=layers))(pack...))) == length(c) -@test_throws DimensionMismatch refine!(c, Uniqueness(; numpoints = length(c) - 1, layers=layers))( +@test_throws DimensionMismatch refine!(c, Uniqueness(; numsites=length(c) - 1, layers=layers))( pack..., ) -end \ No newline at end of file +end