Skip to content

Commit

Permalink
Add prevalence_filter (#61)
Browse files Browse the repository at this point in the history
* add prevalence_filter

* add news and docs
  • Loading branch information
kescobo authored Jul 16, 2021
1 parent dc9b7fc commit 8511606
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 5 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased

- Add `commjoin` function to merge the contents of multiple `CommunityProfile`s ([#58](https://github.com/BioJulia/Microbiome.jl/pull/58))
- Add `prevalence_filter` function to select features that are present at some minumum prevalence

## v0.7.0 Major overhaul

Expand Down
1 change: 1 addition & 0 deletions docs/src/profiles.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ relativeabundance
relativeabundance!
present
prevalence
prevalence_filter
```
3 changes: 2 additions & 1 deletion src/Microbiome.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ export CommunityProfile,
export present,
prevalence,
relativeabundance!,
relativeabundance
relativeabundance,
prevalence_filter
# filterabund

# Diversity
Expand Down
4 changes: 2 additions & 2 deletions src/comm_joins.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ Join multiple `CommunityProfile`s, creating a new `CommunityProfile`.
For now, sample names cannot overlap in any of the input profiles.
```jldoctest
julia> mss = [MicrobiomeSample("sample\$i") for i in 1:15];
julia> mss = [MicrobiomeSample(string("sample",i)) for i in 1:15];
julia> txs = [Taxon("taxon\$i") for i in 1:20];
julia> txs = [Taxon(string("taxon",i)) for i in 1:20];
julia> cm1 = CommunityProfile(spzeros(10,5), txs[1:10], mss[1:5])
CommunityProfile{Float64, Taxon, MicrobiomeSample} with 10 things in 5 places
Expand Down
54 changes: 52 additions & 2 deletions src/profiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ end
Like [`relativeabundance!`](@ref), but does not mutate original.
"""
function relativeabundance(at::T, kind::Symbol=:fraction) where T <: AbstractAbundanceTable
comm = T(float.(abundances(at)), deepcopy(features(at)), deepcopy(samples(at)))
function relativeabundance(at::AbstractAbundanceTable, kind::Symbol=:fraction)
comm = typeof(at)(float.(abundances(at)), deepcopy(features(at)), deepcopy(samples(at)))
relativeabundance!(comm)
end

Expand Down Expand Up @@ -273,3 +273,53 @@ prevalence(a, minabundance::Real=0.0) = mean(x-> present(x, minabundance), (y fo
function prevalence(at::AbstractAbundanceTable, minabundance::Real=0.0)
mean(x-> present(x, minabundance), abundances(at), dims=2)
end

"""
prevalence_filter(comm::AbstractAbundanceTable; minabundance=0.0; minprevalence=0.05, renorm=false)
Return a filtered `CommunityProfile` where features with prevalence lower than `minprevalence` are removed.
By default, a feature is considered "present" if > 0, but this can be changed by setting `minabundance`.
Optionally, set `renorm = true` to calculate relative abundances after low prevalence features are removed.
```jldoctest
julia> comm = CommunityProfile(sparse([3 0 1 # 0.33, assuming minabundance 2
2 2 2 # 1.0
0 0 1 # 0.0
2 0 0 # 0.33
]),
[Taxon(string(i)) for i in 1:4],
[MicrobiomeSample(string(i)) for i in 1:3]);
julia> prevalence_filter(comm, minabundance=2, minprevalence=0.3)
CommunityProfile{Int64, Taxon, MicrobiomeSample} with 3 things in 3 places
Thing names:
1, 2, 4
Place names:
1, 2, 3
julia> prevalence_filter(comm, minabundance=2, minprevalence=0.4)
CommunityProfile{Int64, Taxon, MicrobiomeSample} with 1 things in 3 places
Thing names:
2
Place names:
1, 2, 3
julia> prevalence_filter(comm, minabundance=3, minprevalence=0.3)
CommunityProfile{Int64, Taxon, MicrobiomeSample} with 1 things in 3 places
Thing names:
1
Place names:
1, 2, 3
```
"""
function prevalence_filter(comm::AbstractAbundanceTable; minabundance=0.0, minprevalence=0.05, renorm=false)
comm = comm[vec(prevalence(comm, minabundance) .>= minprevalence), :]
return renorm ? relativeabundance(comm) : comm
end
13 changes: 13 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ end
@test samples(c3) == samples(comm)
@test features(c3) == features(comm)
end

filtertest = CommunityProfile(sparse(Float64[3 2 1 # 0.66, assuming minabundance 2
2 2 2 # 1.0
0 0 1 # 0.0
2 0 0 # 0.33
]),
[Taxon(string(i)) for i in 1:4],
[MicrobiomeSample(string(i)) for i in 1:3]);
@test size(prevalence_filter(filtertest)) == (4,3)
@test size(prevalence_filter(filtertest, minabundance=2)) == (3,3)
@test size(prevalence_filter(filtertest, minabundance=2, minprevalence=0.4)) == (2,3)
@test all(<=(1.0), abundances(prevalence_filter(filtertest, renorm=true)))
@test all(x-> isapprox(x, 1.0, atol=1e-8), sum(abundances(prevalence_filter(filtertest, renorm=true)), dims=1))
end

@testset "Indexing and Tables integration" begin
Expand Down

0 comments on commit 8511606

Please sign in to comment.