diff --git a/Project.toml b/Project.toml index 8708b78..4b65489 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,12 @@ name = "FCSFiles" uuid = "d76558cf-badf-52d4-a17e-381ab0b0d937" -version = "0.1.5" +version = "0.2.0" [deps] +AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" [compat] +AxisArrays = "0.4" FileIO = "1" julia = "1" diff --git a/README.md b/README.md index 16cce88..e0800a9 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ Add FileIO.jl integration for FCS files |--------------------------------------------------|--------------| | ![](https://juliahub.com/docs/FCSFiles/version.svg) | [![][ci-img]][ci-url] [![][codecov-img]][codecov-url] | -## Usage +## Loading an FCSFile +FCS files can be loaded by using the FileIO interface. ```julia julia> using FileIO @@ -27,14 +28,146 @@ FCS.FlowSample{Float32} SSC-W B_530-30-A Time +``` + +## Metadata +Once loaded the parameters of the FCS file are available as properties. + +``` +julia> flowrun.last_modified +"2019-Oct-03 15:35:15" + +julia> flowrun.p1n +"FSC-A" +``` + +## Indexing +There are many ways to index into the FCS file. You can index the FCS file as a matrix (actually an `AxisArray`). + +``` +julia> flowrun[:, 1] +1-dimensional AxisArray{Float32,1,...} with axes: + :param, ["FSC-A", "FSC-H", "SSC-A", "SSC-H", "B1-A", "B1-H", "B2-A", "B2-H", "HDR-CE", "HDR-SE" … "V2-A", "V2-H", "Y1-A", "Y1-H", "Y2-A", "Y2-H", "Y3-A", "Y3-H", "Y4-A", "Y4-H"] +And data, a 23-element Vector{Float32}: + 19.319384 + 12.838199 + 44.391308 + 20.214031 + 0.01834727 + 0.72980446 + -0.25282443 + 0.4430968 + ⋮ + 0.54869235 + -0.027989198 + 0.48970717 + 4.498265 + 5.900927 + 0.02512901 + 0.3956769 +``` + +This retrieves the values of all the parameters for the first event in the FCS file. + +Similarly you can get the values of a single parameter for all events. + +``` +julia> flowrun[1, :] +1-dimensional AxisArray{Float32,1,...} with axes: + :event, 1:83562 +And data, a 83562-element Vector{Float32}: + 19.319384 + 22.961153 + 36.157864 + 30.91769 + 5.644829 + 14.188097 + 34.42944 + 4.4080987 + ⋮ + 23.391977 + -4.813841 + -1.2413055 + 11.075016 + 13.712906 + 23.54529 + 5.740017 +``` + +You can also take ranges of events. +``` +julia> flowrun[1, end-99:end] +1-dimensional AxisArray{Float32,1,...} with axes: + :event, 83463:83562 +And data, a 100-element Vector{Float32}: + 4.576562 + 2.553804 + 10.608879 + -6.4025674 + -18.626959 + 6.1649327 + 24.049818 + 21.735662 + ⋮ + 23.391977 + -4.813841 + -1.2413055 + 11.075016 + 13.712906 + 23.54529 + 5.740017 +``` + +If you know the name of a parameter you can use that name to index. + +``` +julia> flowrun["FSC-A"] +1-dimensional AxisArray{Float32,1,...} with axes: + :event, 1:83562 +And data, a 83562-element Vector{Float32}: + 19.319384 + 22.961153 + 36.157864 + 30.91769 + 5.644829 + 14.188097 + 34.42944 + 4.4080987 + ⋮ + 23.391977 + -4.813841 + -1.2413055 + 11.075016 + 13.712906 + 23.54529 + 5.740017 +``` + +Or you can get multiple parameters at the same time. + +``` +julia> flowrun[["FSC-A", "FSC-H"]] +2-dimensional AxisArray{Float32,2,...} with axes: + :param, ["FSC-A", "FSC-H"] + :event, 1:83562 +And data, a 2×83562 Matrix{Float32}: + 19.3194 22.9612 36.1579 30.9177 … 11.075 13.7129 23.5453 5.74002 + 12.8382 3.40729 17.4995 14.0875 8.80171 5.29686 13.0893 11.3576 +``` + +In general, any indexing that works with `AxisArray`s should work the same with FCS files. + +## Plotting +Here is an example which constructs a 2D histogram visualisation of a FCS file. + +``` julia> using Gadfly julia> p = plot(x=flowrun["FSC-A"], y=flowrun["SSC-A"], Geom.histogram2d, Guide.xlabel("FSC-A"), Guide.ylabel("SSC-A"), Coord.cartesian(xmin=0, ymin=0)) julia> draw(PNG("example.png", 10cm, 7cm, dpi=300), p) - ``` ![](example.png) diff --git a/src/FCSFiles.jl b/src/FCSFiles.jl index 0735370..0cdb472 100644 --- a/src/FCSFiles.jl +++ b/src/FCSFiles.jl @@ -1,6 +1,8 @@ module FCSFiles using FileIO +using AxisArrays +const axes = Base.axes include("type.jl") include("utils.jl") diff --git a/src/parse.jl b/src/parse.jl index 6e22f22..10f15d9 100644 --- a/src/parse.jl +++ b/src/parse.jl @@ -91,11 +91,13 @@ function parse_data(io, # data should be in multiples of `n_params` for list mode (mod(length(flat_data), n_params) != 0) && error("FCS file is corrupt. DATA and TEXT sections don't match.") - data = Dict{String, Vector{dtype}}() + datamatrix = Matrix{dtype}(undef, n_params, length(flat_data) ÷ n_params) + rows = Vector{String}(undef, n_params) for i in 1:n_params - data[text_mappings["\$P$(i)N"]] = flat_data[i:n_params:end] + rows[i] = text_mappings["\$P$(i)N"] + datamatrix[i, :] = flat_data[i:n_params:end] end - + data = AxisArray(datamatrix, Axis{:param}(rows), Axis{:event}(1:size(datamatrix, 2))) FlowSample(data, text_mappings) end diff --git a/src/type.jl b/src/type.jl index 8e81fdc..75b7eae 100644 --- a/src/type.jl +++ b/src/type.jl @@ -1,5 +1,5 @@ -struct FlowSample{T} - data::Dict{String, Vector{T}} +struct FlowSample{T<:Number, I<:AbstractVector{Int}} + data::AxisArray{T, 2, Matrix{T}, Tuple{Axis{:param, Vector{String}}, Axis{:event, I}}} params::Dict{String, String} end @@ -32,11 +32,62 @@ function Base.show(io::IO, f::FlowSample) end end -# Implement most important parts of Dict interface -Base.length(f::FlowSample) = length(f.data) -Base.haskey(f::FlowSample, x) = haskey(f.data, x) -Base.getindex(f::FlowSample, key) = f.data[key] -Base.keys(f::FlowSample) = keys(f.data) -Base.values(f::FlowSample) = values(f.data) -Base.iterate(iter::FlowSample) = Base.iterate(iter.data) -Base.iterate(iter::FlowSample, state) = Base.iterate(iter.data, state) +""" +Looks for `s` in the `params` dict. + +`s` is searched for both as a FCS standard keyword then as a user-defined keyword, with precendence given to the standard keywords. E.g. `param_lookup(flowrun, "par")` will look for both `"\$PAR"` and `"PAR"` but return `"\$PAR"` if it exists, otherwise `"PAR"`. + +In accordance with the FCS3.0 standard, the search is cas insensitive. + +If no match is found, `nothing` is returned. +""" +function param_lookup(f::FlowSample, s::AbstractString) + s = uppercase(s) + params = getfield(f, :params) + + result = get(params, startswith(s, "\$") ? s : "\$" * s, nothing) + + return result === nothing ? get(params, s, nothing) : result +end + +function Base.getproperty(f::FlowSample, s::Symbol) + if s == :params + Base.depwarn("`flowrun.params` is deprecated and will be removed in a future release. Parameters can be accessed like any other member variable. E.g. `flowrun.par` or `flowrun.PAR`.", "flowrun.params") + elseif s == :data + Base.depwarn("`flowrun.data` is deprecated and will be removed in a future release. The data can be indexed, e.g. `flowrun[\"SSC-A\"]` or can be obtained as a matrix with `Array(flowrun)`.", "flowrun.data") + end + + value = param_lookup(f, String(s)) + + if value === nothing + getfield(f, s) + else + value + end +end + +function Base.propertynames(f::FlowSample, private::Bool=false) + makesym(x) = Symbol.(lowercase(first(match(r"^\$?(.+)", x).captures))) + names = makesym.(keys(getfield(f, :params))) + + if private + append!(names, fieldnames(FlowSample)) + end + names +end + +Base.size(f::FlowSample) = size(getfield(f, :data)) +Base.size(f::FlowSample, dim::Int) = size(f)[dim] +Base.length(f::FlowSample) = size(f, 1) + +Base.keys(f::FlowSample) = getfield(f, :data).axes[1] +Base.haskey(f::FlowSample, x) = x in keys(f) +Base.values(f::FlowSample) = [getfield(f, :data)[key] for key in keys(f)] + +Base.axes(f::FlowSample, args...) = AxisArrays.axes(getfield(f, :data), args...) +Base.getindex(f::FlowSample, args...) = getindex(getfield(f, :data), args...) +Base.iterate(iter::FlowSample) = iterate(getfield(iter, :data)) +Base.iterate(iter::FlowSample, state) = iterate(getfield(iter, :data), state) +Base.Array(f::FlowSample) = Array(getfield(f, :data)) + +AxisArrays.axisnames(f::FlowSample) = axisnames(getfield(f, :data)) diff --git a/test/runtests.jl b/test/runtests.jl index a1cde72..2717738 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,9 @@ using FCSFiles using FileIO using Test -project_root = isfile("runtests.jl") ? abspath("..") : abspath(".") +project_root = dirname(dirname(@__FILE__)) testdata_dir = joinpath(project_root, "test", "fcsexamples") + if !isdir(testdata_dir) run(`git -C $(joinpath(project_root, "test")) clone https://github.com/tlnagy/fcsexamples.git --branch main --depth 1`) else @@ -14,18 +15,134 @@ else end @testset "FCSFiles test suite" begin - # test the size of the file - @testset "SSC-A size" begin - flowrun = load(joinpath(testdata_dir, "BD-FACS-Aria-II.fcs")) - @test length(flowrun["SSC-A"]) == 100000 - end - # test the loading of a large FCS file @testset "Loading of large FCS file" begin # load the large file flowrun = load(joinpath(testdata_dir, "Day 3.fcs")) - @test length(flowrun.data) == 50 - @test length(flowrun.params) == 268 + @test length(flowrun) == 50 + @test length(getfield(flowrun, :params)) == 268 + end + + @testset "FlowSample size and length" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + @test size(flowrun) == (14, 100000) + @test length(flowrun) == 14 + end + + @testset "FlowSample keys and haskey" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + expected = [ + "G710-A", "FSC-H", "V545-A", "FSC-A", "G560-A", "Time", + "SSC-A", "B515-A", "G610-A", "Event #", "R780-A", + "G780-A", "V450-A", "G660-A", + ] + flowrun = load(fn) + + for channel in expected + @test haskey(flowrun, channel) + end + + @test all(x in keys(flowrun) for x in expected) + end + + # AxisArray already has tests, here we are just checking that + # relevant methods get forwarded to their AxisArray implementation + @testset "Channel access using String" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + for key in keys(flowrun) + @test flowrun[key] == getfield(flowrun, :data)[key] + end + end + + @testset "Multiple channel access using String" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + channels = keys(flowrun) + for (keyA, keyB) in zip(channels[1:end-1], channels[2:end]) + @test flowrun[[keyA, keyB]] == getfield(flowrun, :data)[[keyA, keyB]] + end + end + + @testset "Integer sample indexing as second dimension" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + idx = rand(1:size(flowrun, 2)) + @test getfield(flowrun, :data)[:, idx] == flowrun[:, idx] + + @test getfield(flowrun, :data)[:, begin] == flowrun[:, begin] + + @test getfield(flowrun, :data)[:, end] == flowrun[:, end] + + rng = range(sort(rand(1:size(flowrun, 2), 2))..., step=1) + @test getfield(flowrun, :data)[:, rng] == flowrun[:, rng] + end + + @testset "Mixed indexing with String and Integer" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + idx = rand(1:size(flowrun, 2)) + @test getfield(flowrun, :data)["SSC-A", idx] == flowrun["SSC-A", idx] + + @test getfield(flowrun, :data)[["SSC-A", "FSC-A"], idx] == flowrun[["SSC-A", "FSC-A"], idx] + + rng = range(sort(rand(1:size(flowrun, 2), 2))..., step=1) + @test getfield(flowrun, :data)["SSC-A", rng] == flowrun["SSC-A", rng] + + @test getfield(flowrun, :data)[["SSC-A", "FSC-A"], rng] == flowrun[["SSC-A", "FSC-A"], rng] + end + + @testset "Logical indexing in second dimension" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + idxs = rand(Bool, size(flowrun, 2)) + @test getfield(flowrun, :data)["SSC-A", idxs] == flowrun["SSC-A", idxs] + end + + @testset "Convert to Matrix" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + @test Array(getfield(flowrun, :data)) == Array(flowrun) + end + + @testset "Regression for reading FCS files" begin + # should catch if changes to the parsing of the file introduce errors + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + checkpoints = [ + ("SSC-A", 33), + ("G610-A", 703), + ("Event #", 382), + ("FSC-A", 15), + ("Time", 1), + ("V450-A", 9938) + ] + + expected = [585.006f0, 993.2587f0, 3810.0f0, 131008.0f0, 0.0f0, 472.9652f0] + + for (checkpoint, value) in zip(checkpoints, expected) + @test flowrun[checkpoint[1]][checkpoint[2]] == value + end + end + + @testset "Iterating FlowSample" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + i = 1 + pass = true + for x in flowrun + pass = pass && x == flowrun[i] + i = i + 1 + end + @test pass end @testset "Loading float-encoded file" begin @@ -39,6 +156,79 @@ end flowrun = load(joinpath(testdata_dir, "Accuri - C6.fcs")) @test length(flowrun["SSC-A"]) == 63273 @test flowrun["SSC-A"][2] == 370971 + end + + @testset "params throws deprecation warning" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + msg = "`flowrun.params` is deprecated and will be removed in a future release. Parameters can be accessed like any other member variable. E.g. `flowrun.par` or `flowrun.PAR`." + @test_logs (:warn, msg) flowrun.params + end + + @testset "data throws deprecation warning" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + + msg = "`flowrun.data` is deprecated and will be removed in a future release. The data can be indexed, e.g. `flowrun[\"SSC-A\"]` or can be obtained as a matrix with `Array(flowrun)`." + @test_logs (:warn, msg) flowrun.data + end + + @testset "`param_lookup` for different versions of the param" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + pass = true + + for (key, value) in getfield(flowrun, :params) + # exact name + pass = pass && value == FCSFiles.param_lookup(flowrun, key) + # with no $ + var = first(match(r"^\$?(.+)", key).captures) + pass = pass && value == FCSFiles.param_lookup(flowrun, var) + # in lowercase + pass = pass && value == FCSFiles.param_lookup(flowrun, lowercase(key)) + # in lowercase with no $ + pass = pass && value == FCSFiles.param_lookup(flowrun, lowercase(var)) + end + @test pass + end + + @testset "param access through `Base.getproperty`" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + pass = true + + for (key, value) in getfield(flowrun, :params) + # bare usage + pass = pass && value == getproperty(flowrun, Symbol(key)) + # with no $ + var = first(match(r"^\$?(.+)", key).captures) + pass = pass && value == getproperty(flowrun, Symbol(var)) + # in lowercase + pass = pass && value == getproperty(flowrun, Symbol(lowercase(key))) + # in lowercase with no $ + pass = pass && value == getproperty(flowrun, Symbol(lowercase(var))) + end + @test pass + @static if VERSION < v"1.8" + @test_throws ErrorException flowrun.notthere + else + @test_throws "no field notthere" flowrun.notthere + end + end + + @testset "property names give the names of the parameters" begin + fn = joinpath(testdata_dir, "BD-FACS-Aria-II.fcs") + flowrun = load(fn) + pass = true + + for key in keys(getfield(flowrun, :params)) + var = Symbol(lowercase(first(match(r"^\$?(.+)", key).captures))) + pass = pass && var in propertynames(flowrun) + end + @test pass + @test :params in propertynames(flowrun, true) + @test :data in propertynames(flowrun, true) end end