From fbce94f8f7285d9afdbc3c78055ef84ac93812ac Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 14:06:40 +0200 Subject: [PATCH 1/8] add `vcat` --- src/Onda.jl | 2 +- src/samples.jl | 29 ++++++++++++++++++++++++++ test/samples.jl | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/Onda.jl b/src/Onda.jl index 81c3c74c..6b4f6486 100644 --- a/src/Onda.jl +++ b/src/Onda.jl @@ -1,6 +1,6 @@ module Onda -using Compat: @compat +using Compat: @compat, allequal using UUIDs, Dates, Random, Mmap using Compat, Legolas, TimeSpans, Arrow, Tables, TranscodingStreams, CodecZstd using Legolas: @schema, @version, write_full_path diff --git a/src/samples.jl b/src/samples.jl index f24e8261..7b40a57c 100644 --- a/src/samples.jl +++ b/src/samples.jl @@ -186,6 +186,35 @@ function _column_arguments(samples::Samples, x) return _indices_fallback(_column_arguments, samples, x) end +##### +##### operations +##### + +# Ensure we don't match `vcat()` since that would be piracy +function Base.vcat(first_samples::Samples, more_samples::Samples...) + samples = (first_samples, more_samples...) + for field in setdiff(fieldnames(SamplesInfoV2), [:channels]) + if !allequal(getfield(s.info, field) for s in samples) + throw(ArgumentError("Cannot `vcat` samples objects which do not all have the same `$field`. Got values: $([getfield(s.info, field) for s in samples])")) + end + end + if !allequal(duration(s) for s in samples) + throw(ArgumentError("Cannot `vcat` samples objects which do not all have the same duration. Got values: $([duration(s) for s in samples])")) + end + if !allequal((s.encoded for s in samples)) + throw(ArgumentError("Cannot `vcat` samples objects which are not all encoded or all decoded. Got encoding values: $([s.encoded for s in samples])")) + end + all_channels = collect(Iterators.flatten(s.info.channels for s in samples)) + if !allunique(all_channels) + throw(ArgumentError("Cannot `vcat` samples objects which do not have unique channel names. Got channel names: $(all_channels)")) + end + # We checked all fields match except `channels`, so we can start with the first one and update the channels + # (we also know `samples` is non-empty by the signature) + info = Legolas.record_merge(first(samples).info; channels=all_channels) + data = vcat((s.data for s in samples)...) + return Samples(data, info, first(samples).encoded) +end + ##### ##### encoding utilities ##### diff --git a/test/samples.jl b/test/samples.jl index 935bf078..8ebb7dc2 100644 --- a/test/samples.jl +++ b/test/samples.jl @@ -247,6 +247,60 @@ end @test hash(samples) == hash(samples2) end +@testset "Base.vcat" begin + info = SamplesInfoV2(sensor_type="eeg", + channels=["a", "b", "c"], + sample_unit="unit", + sample_resolution_in_unit=1.0, + sample_offset_in_unit=0.0, + sample_type=Float32, + sample_rate=100.0) + + for encoded in (true, false) + samples1 = Samples(rand(sample_type(info), 3, 100), info, encoded) + + info2 = Legolas.record_merge(info; channels = ["d", "e", "f"]) + samples2 = Samples(rand(sample_type(info2), 3, 100), info2, encoded) + + samples12 = vcat(samples1, samples2) + @test samples12.data[1:3, :] == samples1.data + @test samples12.data[4:6, :] == samples2.data + @test samples12.info.channels == map(string, 'a':'f') + end + + err = ArgumentError("""Cannot `vcat` samples objects which do not have unique channel names. Got channel names: ["a", "b", "c", "a", "b", "c"]""") + @test_throws err vcat(samples1, samples1) + + samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_rate = 10), true) + err = ArgumentError("Cannot `vcat` samples objects which do not all have the same `sample_rate`. Got values: [100.0, 10.0]") + @test_throws err vcat(samples1, samples2) + + samples2 = Samples(rand(Float64, 3, 100), Legolas.record_merge(info2; sample_type = Float64), true) + err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_type`. Got values: ["float32", "float64"]""") + @test_throws err vcat(samples1, samples2) + + samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sensor_type = "eeg2"), true) + err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sensor_type`. Got values: ["eeg", "eeg2"]""") + @test_throws err vcat(samples1, samples2) + + samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_unit = "unit2"), true) + err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_unit`. Got values: ["unit", "unit2"]""") + @test_throws err vcat(samples1, samples2) + + samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_resolution_in_unit = 5), true) + err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_resolution_in_unit`. Got values: [1.0, 5.0]""") + @test_throws err vcat(samples1, samples2) + + samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_offset_in_unit = 5), true) + err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_offset_in_unit`. Got values: [0.0, 5.0]""") + @test_throws err vcat(samples1, samples2) + + samples2 = Samples(rand(sample_type(info), 3, 100), info2, false) + err = ArgumentError("""Cannot `vcat` samples objects which are not all encoded or all decoded. Got encoding values: Bool[1, 0]""") + @test_throws err vcat(samples1, samples2) +end + + @testset "Samples views" begin info = SamplesInfoV2(sensor_type="eeg", From eef5d38e771c58574b0bdfb513ae6f1059145979 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:06:50 +0200 Subject: [PATCH 2/8] bump Legolas compat requirement --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 91438702..f79a8ce1 100644 --- a/Project.toml +++ b/Project.toml @@ -29,7 +29,7 @@ CodecZstd = "0.6, 0.7, 0.8" Compat = "3.32, 4" DataFrames = "1.2" FLAC_jll = "1.3.3" -Legolas = "0.5" +Legolas = "0.5.18" Minio = "0.2" Tables = "1.4" TimeSpans = "1.1" From 4754c2cf73753f8e24bb4dd55dee15b588c4c849 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:08:33 +0200 Subject: [PATCH 3/8] Revert "bump Legolas compat requirement" This reverts commit eef5d38e771c58574b0bdfb513ae6f1059145979. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f79a8ce1..91438702 100644 --- a/Project.toml +++ b/Project.toml @@ -29,7 +29,7 @@ CodecZstd = "0.6, 0.7, 0.8" Compat = "3.32, 4" DataFrames = "1.2" FLAC_jll = "1.3.3" -Legolas = "0.5.18" +Legolas = "0.5" Minio = "0.2" Tables = "1.4" TimeSpans = "1.1" From ce58509ce8e4e31306878fae21f097450c58a4e1 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:11:34 +0200 Subject: [PATCH 4/8] backport `record_merge` instead --- src/deprecations.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/deprecations.jl b/src/deprecations.jl index b71111fe..9c691fc8 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -94,3 +94,15 @@ function upgrade(from::SignalV1, ::SignalV2SchemaVersion) from.channels, from.sample_unit, from.sample_resolution_in_unit, from.sample_offset_in_unit, from.sample_type, from.sample_rate) end + +# Not quite a deprecation, but we will backport `record_merge` for our own purposes +if pkgversion(Legolas) < v"0.5.18" + function record_merge(record::Legolas.AbstractRecord; fields_to_merge...) + # Avoid using `typeof(record)` as can cause constructor failures with parameterized + # record types. + R = Legolas.record_type(Legolas.schema_version_from_record(record)) + return R(Tables.rowmerge(record; fields_to_merge...)) + end +else + using Legolas: record_merge +end From b97e9f73193c37b9f533b7f47fba4895a5fae3de Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:12:38 +0200 Subject: [PATCH 5/8] fix test --- test/samples.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/samples.jl b/test/samples.jl index 8ebb7dc2..f64897f5 100644 --- a/test/samples.jl +++ b/test/samples.jl @@ -268,6 +268,9 @@ end @test samples12.info.channels == map(string, 'a':'f') end + samples1 = Samples(rand(sample_type(info), 3, 100), info, true) + info2 = Legolas.record_merge(info; channels = ["d", "e", "f"]) + err = ArgumentError("""Cannot `vcat` samples objects which do not have unique channel names. Got channel names: ["a", "b", "c", "a", "b", "c"]""") @test_throws err vcat(samples1, samples1) From 59e35f166f033cddfb19ecf022fe2a3616d8524c Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:38:00 +0200 Subject: [PATCH 6/8] switch to `Onda.record_merge` --- test/samples.jl | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/test/samples.jl b/test/samples.jl index f64897f5..9b901ea4 100644 --- a/test/samples.jl +++ b/test/samples.jl @@ -259,7 +259,9 @@ end for encoded in (true, false) samples1 = Samples(rand(sample_type(info), 3, 100), info, encoded) - info2 = Legolas.record_merge(info; channels = ["d", "e", "f"]) + # Note: `record_merge` is defined in Legolas, but for the purposes of backwards compatibility + # with old Arrow versions + info2 = Onda.record_merge(info; channels = ["d", "e", "f"]) samples2 = Samples(rand(sample_type(info2), 3, 100), info2, encoded) samples12 = vcat(samples1, samples2) @@ -269,32 +271,32 @@ end end samples1 = Samples(rand(sample_type(info), 3, 100), info, true) - info2 = Legolas.record_merge(info; channels = ["d", "e", "f"]) + info2 = Onda.record_merge(info; channels = ["d", "e", "f"]) err = ArgumentError("""Cannot `vcat` samples objects which do not have unique channel names. Got channel names: ["a", "b", "c", "a", "b", "c"]""") @test_throws err vcat(samples1, samples1) - samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_rate = 10), true) + samples2 = Samples(rand(sample_type(info), 3, 100), Onda.record_merge(info2; sample_rate = 10), true) err = ArgumentError("Cannot `vcat` samples objects which do not all have the same `sample_rate`. Got values: [100.0, 10.0]") @test_throws err vcat(samples1, samples2) - samples2 = Samples(rand(Float64, 3, 100), Legolas.record_merge(info2; sample_type = Float64), true) + samples2 = Samples(rand(Float64, 3, 100), Onda.record_merge(info2; sample_type = Float64), true) err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_type`. Got values: ["float32", "float64"]""") @test_throws err vcat(samples1, samples2) - samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sensor_type = "eeg2"), true) + samples2 = Samples(rand(sample_type(info), 3, 100), Onda.record_merge(info2; sensor_type = "eeg2"), true) err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sensor_type`. Got values: ["eeg", "eeg2"]""") @test_throws err vcat(samples1, samples2) - samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_unit = "unit2"), true) + samples2 = Samples(rand(sample_type(info), 3, 100), Onda.record_merge(info2; sample_unit = "unit2"), true) err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_unit`. Got values: ["unit", "unit2"]""") @test_throws err vcat(samples1, samples2) - samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_resolution_in_unit = 5), true) + samples2 = Samples(rand(sample_type(info), 3, 100), Onda.record_merge(info2; sample_resolution_in_unit = 5), true) err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_resolution_in_unit`. Got values: [1.0, 5.0]""") @test_throws err vcat(samples1, samples2) - samples2 = Samples(rand(sample_type(info), 3, 100), Legolas.record_merge(info2; sample_offset_in_unit = 5), true) + samples2 = Samples(rand(sample_type(info), 3, 100), Onda.record_merge(info2; sample_offset_in_unit = 5), true) err = ArgumentError("""Cannot `vcat` samples objects which do not all have the same `sample_offset_in_unit`. Got values: [0.0, 5.0]""") @test_throws err vcat(samples1, samples2) From 4ceafd7462d5655f9d26f030bea5d1b5a777a779 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:38:03 +0200 Subject: [PATCH 7/8] switch to `Onda.record_merge` --- test/samples.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/samples.jl b/test/samples.jl index 9b901ea4..b432f3ec 100644 --- a/test/samples.jl +++ b/test/samples.jl @@ -259,8 +259,8 @@ end for encoded in (true, false) samples1 = Samples(rand(sample_type(info), 3, 100), info, encoded) - # Note: `record_merge` is defined in Legolas, but for the purposes of backwards compatibility - # with old Arrow versions + # Note: `record_merge` is defined in newer Legolas versions, but for the purposes of backwards compatibility + # with old Arrow versions, we've backported for internal use in Onda. info2 = Onda.record_merge(info; channels = ["d", "e", "f"]) samples2 = Samples(rand(sample_type(info2), 3, 100), info2, encoded) From f066825237035feee852a9be505c6572f865f653 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:18:33 +0200 Subject: [PATCH 8/8] Update src/samples.jl Co-authored-by: Dave Kleinschmidt --- src/samples.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/samples.jl b/src/samples.jl index 7b40a57c..00b4e2c9 100644 --- a/src/samples.jl +++ b/src/samples.jl @@ -210,7 +210,7 @@ function Base.vcat(first_samples::Samples, more_samples::Samples...) end # We checked all fields match except `channels`, so we can start with the first one and update the channels # (we also know `samples` is non-empty by the signature) - info = Legolas.record_merge(first(samples).info; channels=all_channels) + info = record_merge(first(samples).info; channels=all_channels) data = vcat((s.data for s in samples)...) return Samples(data, info, first(samples).encoded) end