Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Breaking: add combine method for groupby output, fixing similar for AbstractDimStack #903

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/DimensionalData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ export dimnum, hasdim, hasselection, otherdims
export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!,
mergedims, unmergedims, maplayers

export groupby, seasons, months, hours, intervals, ranges
export groupby, combine, seasons, months, hours, intervals, ranges


export @d
Expand Down
32 changes: 31 additions & 1 deletion src/groupby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -447,11 +447,41 @@ end

Generate a `Vector` of `UnitRange` with length `step(A)`
"""
intervals(rng::AbstractRange) = IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng))
intervals(rng::AbstractRange) =
IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng))

"""
ranges(A::AbstractRange{<:Integer})

Generate a `Vector` of `UnitRange` with length `step(A)`
"""
ranges(rng::AbstractRange{<:Integer}) = map(x -> x:x+step(rng)-1, rng)


"""
combine(f::Function, gb::DimGroupByArray; dims=:)

Combine the `DimGroupByArray` using funciton `f` over the group dimensions.

If `dims` is given, combine only the dimensions in `dims`. The reducing function
`f` must accept a `dims` keyword.
"""
function combine(f::Function, gb::DimGroupByArray{G}; dims=:) where G
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DataAPI.combine? Since DD owns one of the types...

Copy link
Owner Author

@rafaqz rafaqz Jan 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't exist yet JuliaData/DataAPI.jl#65 ;)

# This works for both arrays and stacks
# Combine the remaining dimensions after reduction and the group dimensions
destdims = (otherdims(DD.dims(first(gb)), dims)..., DD.dims(gb)...)
# Get the output eltype
T = Base.promote_op(f, G)
# Create a output array with the combined dimensions
dest = similar(first(gb), T, destdims)
for D in DimIndices(gb)
if dims isa Colon
# Assigned reduced scalar to dest
dest[D...] = f(gb[D])
else
# Broadcast the reduced array to dest
dest[D...] .= f(gb[D]; dims)
end
end
return dest
end
26 changes: 12 additions & 14 deletions src/stack/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ for f in (:getindex, :view, :dotview)
end
end

@generated function _any_dimarray(v::Union{NamedTuple,Tuple})
any(T -> T <: AbstractDimArray, v.types)
end

#### setindex ####
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs, I...; kw...) =
Expand All @@ -157,22 +160,17 @@ end
hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...)
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::AbstractArray; kw...) =
hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...)
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::DimensionIndsArrays; kw...) =
_map_setindex!(s, xs, i; kw...)
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, I...; kw...) =
_map_setindex!(s, xs, I...; kw...)

@propagate_inbounds function Base.setindex!(
s::AbstractDimStack, xs::NamedTuple, I...; kw...
)
map((A, x) -> setindex!(A, x, I...; kw...), layers(s), xs)
end

_map_setindex!(s, xs, i; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs)
_map_setindex!(s, xs, i...; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs)

_setindex_mixed!(s::AbstractDimStack, x, i::AbstractArray) =
map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s))
_setindex_mixed!(s::AbstractDimStack, i::Integer) =
map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s))
function _setindex_mixed!(s::AbstractDimStack, x, i::Colon)
map(DimIndices(dims(s))) do D
map(A -> setindex!(A, D), x, layers(s))
function _setindex_mixed!(s::AbstractDimStack, xs::NamedTuple, i)
D = DimIndices(dims(s))[i]
map(layers(s), xs) do A, x
A[D] = x
end
end

Expand Down
28 changes: 27 additions & 1 deletion src/stack/stack.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ Base.length(s::AbstractDimStack) = prod(size(s))
Base.axes(s::AbstractDimStack) = map(first ∘ axes, dims(s))
Base.axes(s::AbstractDimStack, dims::DimOrDimType) = axes(s, dimnum(s, dims))
Base.axes(s::AbstractDimStack, dims::Integer) = axes(s)[dims]
Base.similar(s::AbstractDimStack, args...) = maplayers(A -> similar(A, args...), s)
Base.eltype(::AbstractDimStack{<:Any,T}) where T = T
Base.ndims(::AbstractDimStack{<:Any,<:Any,N}) where N = N
Base.CartesianIndices(s::AbstractDimStack) = CartesianIndices(dims(s))
Expand Down Expand Up @@ -197,6 +196,33 @@ Base.get(f::Base.Callable, st::AbstractDimStack, k::Symbol) =
@propagate_inbounds Base.iterate(st::AbstractDimStack, i) =
i > length(st) ? nothing : (st[DimIndices(st)[i]], i + 1)

Base.similar(s::AbstractDimStack) = similar(s, eltype(s))
Base.similar(s::AbstractDimStack, dims::Tuple{Vararg{Dimension}}) =
similar(s, eltype(s), dims)
Base.similar(s::AbstractDimStack, ::Type{T}) where T =
similar(s, T, dims(s))
function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T
# Any dims not in the stack are added to all layers
ods = otherdims(s, dims)
maplayers(s) do A
# Original layer dims are maintained, other dims are added
D = DD.commondims(dims, (dims(A)..., ods))
similar(A, T, D)
end
end
function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T<:NamedTuple
ods = otherdims(s, dims)
maplayers(s, _nt_types(T)) do A, Tx
D = DD.commondims(dims, (DD.dims(A)..., ods))
similar(A, Tx, D)
end
end

@generated function _nt_types(::Type{NamedTuple{K,T}}) where {K,T}
expr = Expr(:tuple, T.parameters...)
return :(NamedTuple{K}($expr))
end

# `merge` for AbstractDimStack and NamedTuple.
# One of the first three arguments must be an AbstractDimStack for dispatch to work.
Base.merge(s::AbstractDimStack) = s
Expand Down
5 changes: 5 additions & 0 deletions test/groupby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ st = DimStack((a=A, b=A, c=A[X=1]))
mean(st[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1])
end
@test mean.(groupby(st, Ti=>month)) == manualmeans_st
combined_st = combine(mean, groupby(st, Ti=>month))
@test combined_st isa DimStack{(:a, :b, :c), @NamedTuple{a::Float64, b::Float64, c::Float64}}
@test collect(combined_st) == manualmeans_st
st[1] = (a= 1, b=2, c=3)

manualsums = mapreduce(hcat, months) do m
vcat(sum(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1, X=1 .. 1.5]),
Expand Down Expand Up @@ -52,6 +56,7 @@ end
@test mean.(groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans
@test mean.(groupby(A, Ti=>Bins(month, intervals(1:3:12)))) == manualmeans
@test mean.(groupby(A, Ti=>Bins(month, 4))) == manualmeans
@test DimensionalData.combine(mean, groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans
end

@testset "dimension matching groupby" begin
Expand Down
Loading