From ce1b94f7ef183fceea09ed588ebe7906989b2838 Mon Sep 17 00:00:00 2001 From: Kevin Bonham Date: Mon, 9 Aug 2021 17:22:38 -0400 Subject: [PATCH] Metadata for CommunityProfiles (#66) * slow but functional metadata export * add tests * make return value a row table --- src/Microbiome.jl | 3 ++- src/profiles.jl | 21 +++++++++++++++++++++ src/samples_features.jl | 1 + test/runtests.jl | 13 +++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/Microbiome.jl b/src/Microbiome.jl index 05fda65..7b53ae3 100644 --- a/src/Microbiome.jl +++ b/src/Microbiome.jl @@ -37,7 +37,8 @@ export CommunityProfile, profiletype, featuretotals, sampletotals, - commjoin + commjoin, + metadata # Abundances export present, diff --git a/src/profiles.jl b/src/profiles.jl index 69e0c3d..31a1152 100644 --- a/src/profiles.jl +++ b/src/profiles.jl @@ -323,3 +323,24 @@ function prevalence_filter(comm::AbstractAbundanceTable; minabundance=0.0, minpr comm = comm[vec(prevalence(comm, minabundance) .>= minprevalence), :] return renorm ? relativeabundance(comm) : comm end + + +## Metadata + +""" + metadata(cp::CommunityProfile) + +Returns iterator of `NamedTuple` per sample, where keys are `:sample` +and each metadata key found in `cp`. +Samples without given metadata are filled with `missing`. + +Returned values can be passed to any Tables.rowtable - compliant type, +eg `DataFrame`. +""" +function metadata(cp::CommunityProfile) + ss = samples(cp) + cols = unique(reduce(hcat, collect.(keys.(metadata.(samples(cp)))))) + return Tables.rowtable(merge((; sample=name(s)), + NamedTuple(c => get(s, c, missing) for c in cols) + ) for s in ss) +end \ No newline at end of file diff --git a/src/samples_features.jl b/src/samples_features.jl index ec466f1..174d4f4 100644 --- a/src/samples_features.jl +++ b/src/samples_features.jl @@ -75,6 +75,7 @@ end Base.keys(as::AbstractSample) = keys(metadata(as)) Base.haskey(as::AbstractSample, key::Symbol) = in(key, keys(as)) +Base.get(as::AbstractSample, key::Symbol, default) = get(metadata(as), key, default) """ MicrobiomeSample(name::String, metadata::Dictionary{Symbol, T}) <: AbstractSample diff --git a/test/runtests.jl b/test/runtests.jl index 27a70c7..d7f8d1e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -145,6 +145,19 @@ end @test size(prevalence_filter(filtertest, minabundance=2, minprevalence=0.4)) == (2,3) @test all(<=(1.0), abundances(prevalence_filter(filtertest, renorm=true))) @test all(x-> isapprox(x, 1.0, atol=1e-8), sum(abundances(prevalence_filter(filtertest, renorm=true)), dims=1)) + + s1 = MicrobiomeSample("sample1", Dictionary(Dict(:age=> 37, :name=>"kevin", :something=>1.0))) + s2 = MicrobiomeSample("sample2", Dictionary(Dict(:age=> 37, :name=>"kevin", :something_else=>2.0))) + + md1, md2 = metadata(CommunityProfile(sparse([1 1; 2 2; 3 3]), [Taxon(string(i)) for i in 1:3], [s1, s2])) + + @test all(row-> row[:age] == 37, [md1, md2]) + @test all(row-> row[:name] == "kevin", [md1, md2]) + @test md1[:something] == 1.0 + @test ismissing(md2[:something]) + @test md2[:something_else] == 2.0 + @test ismissing(md1[:something_else]) + end @testset "Indexing and Tables integration" begin