From 996e16d6628aa8d73d228b61e82b6e3e5684787f Mon Sep 17 00:00:00 2001 From: Nils Date: Fri, 10 Dec 2021 12:59:44 +0000 Subject: [PATCH] Update type hierarchy - new version --- Project.toml | 2 +- src/TreatmentPanel.jl | 149 ++++++++++++++--------------------------- src/TreatmentPanels.jl | 6 +- src/show_and_plot.jl | 12 +++- test/runtests.jl | 59 +++++++++++----- 5 files changed, 111 insertions(+), 117 deletions(-) diff --git a/Project.toml b/Project.toml index 986fa3a..c73092f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TreatmentPanels" uuid = "7885c543-3ac4-48a3-abed-7a36d7ddb69f" authors = ["Nils and contributors"] -version = "0.2.0" +version = "0.3.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" diff --git a/src/TreatmentPanel.jl b/src/TreatmentPanel.jl index eab6828..7d438f5 100644 --- a/src/TreatmentPanel.jl +++ b/src/TreatmentPanel.jl @@ -4,17 +4,25 @@ using DataFrames, Dates, Parameters abstract type TreatmentPanel end # Types for number of treatment units and periods -abstract type UnitTreatmentType end +# Treatment duration - this is either continuous or discontinuous abstract type TreatmentDurationType end -struct SingleUnitTreatment <: UnitTreatmentType end -struct MultiUnitSimultaneousTreatment <: UnitTreatmentType end -struct MultiUnitStaggeredTreatment <: UnitTreatmentType end -struct ContinuousTreatment <: TreatmentDurationType end -struct StartEndTreatment <: TreatmentDurationType end +struct Continuous <: TreatmentDurationType end +struct Discontinuous <: TreatmentDurationType end + +# Treatment timing - relevant only for MultiUnitTreatments, can be simultaneous or staggered +abstract type TreatmentTimingType end +struct Simultaneous{T <: TreatmentDurationType} <: TreatmentTimingType end +struct Staggered{T <: TreatmentDurationType} <: TreatmentTimingType end + +# Unit type - either single or multiple treated units +abstract type TreatmentType end +struct SingleUnitTreatment{T <: TreatmentDurationType} <: TreatmentType end +struct MultiUnitTreatment{T <: TreatmentTimingType} <: TreatmentType end + # BalancedPanel will have an N×T matrix of treatment assigment and outcomes """ - BalancedPanel{UnitTreatmentType, TreatmentDurationType} + BalancedPanel{TreatmentType} A TreatmentPanel in which all N treatment units are observed for the same T periods. @@ -36,7 +44,7 @@ The following table provides an overview of the types of treatment pattern suppo | **one unit** | Pair{String, Date} | Pair{String, Tuple{Date, Date}} | Pair{String}, Vector{Tuple{Date, Date}}} | | **multiple units** | Vector{Pair{String, Date}} | Vector{Pair{String, Tuple{Date, Date}}} | Vector{Pair{String}, Vector{Tuple{Date, Date}}}} | """ -@with_kw struct BalancedPanel{UTType, TDType} <: TreatmentPanel where UTType <: UnitTreatmentType where TDType <: TreatmentDurationType +@with_kw struct BalancedPanel{UTType} <: TreatmentPanel where UTType <: TreatmentType W::Union{Matrix{Bool}, Matrix{Union{Missing, Bool}}} Y::Matrix{Float64} df::DataFrame @@ -120,7 +128,7 @@ function construct_W(tas::Vector{Pair{T1, S1}}, N, T, is, ts) where T1 where S1 return W end -# Constructor for single continuous treatment - returns BalancedPanel{SingleUnitTreatment, ContinuousTreatment} +# Constructor for single continuous treatment - returns BalancedPanel{SingleUnitTreatment{Continuous}} function BalancedPanel(df::DataFrame, treatment_assignment::Pair{T1, T2}; id_var = nothing, t_var = nothing, outcome_var = nothing, sort_inplace = false) where T1 where T2 <: Union{Date, Int} @@ -151,16 +159,12 @@ function BalancedPanel(df::DataFrame, treatment_assignment::Pair{T1, T2}; W = construct_W(treatment_assignment, N, T, is, ts) # Outcome matrix - Y = zeros(size(W)) + Y = zeros(eltype(df[!, outcome_var]), size(W)) for (row, i) ∈ enumerate(is), (col, t) ∈ enumerate(ts) - try - Y[row, col] = only(df[(df[!, id_var] .== i) .& (df[!, t_var] .== t), outcome_var]) - catch ArgumentError - throw("$(nrow(df[(df[!, id_var] .== i) .& (df[!, t_var] .== t), :])) outcomes present in the data for unit $i in period $t") - end + Y[row, col] = only(df[(df[!, id_var] .== i) .& (df[!, t_var] .== t), outcome_var]) end - BalancedPanel{SingleUnitTreatment, ContinuousTreatment}(W, Y, df, id_var, t_var, outcome_var, ts, is) + BalancedPanel{SingleUnitTreatment{Continuous}}(W, Y, df, id_var, t_var, outcome_var, ts, is) end # Getter functions @@ -170,7 +174,7 @@ end Returns the indices of treated units in the panel, so that Y[treated_ids(x), :] returns a (Nₜᵣ×T) matrix of outcomes for treated units in all periods. """ -function treated_ids(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 +function treated_ids(x::BalancedPanel{SingleUnitTreatment{T}}) where T for i ∈ 1:size(x.Y, 1) for t ∈ 1:size(x.Y, 2) if x.W[i, t] @@ -180,12 +184,16 @@ function treated_ids(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 end end +function treated_ids(x::BalancedPanel{MultiUnitTreatment{T}}) where T + findall(>(0), vec(sum(Y, dims = 2))) +end + """ treated_labels(x <: BalancedPanel) Returns the labels of treated units as given by the `id_var` column in the underlying data set. """ -function treated_labels(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 +function treated_labels(x::BalancedPanel{SingleUnitTreatment{T}}) where T x.is[treated_ids(x)] end @@ -196,7 +204,7 @@ end of length Nₜᵣ, where each element is the index of the first 1 in the row of treatment matrix W corresonding to the treatment unit. """ -function first_treated_period_ids(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function first_treated_period_ids(x::BalancedPanel{SingleUnitTreatment{T}}) where T findfirst(vec(x.W[treated_ids(x), :])) end @@ -206,7 +214,7 @@ end Returns the labels of the first treated period for each treated units, that is, a Vector{T} of length Nₜᵣ, where T is the eltype of the `t_var` column in the underlying data. """ -function first_treated_period_labels(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function first_treated_period_labels(x::BalancedPanel{SingleUnitTreatment{T}}) where T x.ts[first_treated_period_ids(x)] end @@ -215,7 +223,7 @@ end Returns the number of pre-treatment periods for each treated unit. """ -function length_T₀(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function length_T₀(x::BalancedPanel{SingleUnitTreatment{Continuous}}) first_treated_period_ids(x) - 1 end @@ -224,7 +232,7 @@ end Returns the number of treatment periods for each treated unit. """ -function length_T₁(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function length_T₁(x::BalancedPanel{SingleUnitTreatment{Continuous}}) size(x.Y, 2) .- first_treated_period_ids(x) + 1 end @@ -235,7 +243,7 @@ end Returns the pre-treatment outcomes for the treated unit(s). For SingleUnitTreatment designs, this is a vector of length T₀, while for MultiUnitTreatment designs, it is a (Nₜᵣ×T₀) matrix """ -function get_y₁₀(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function get_y₁₀(x::BalancedPanel{SingleUnitTreatment{Continuous}}) x.Y[treated_ids(x), 1:first_treated_period_ids(x)-1] end @@ -245,16 +253,16 @@ end Returns the post-treatment outcomes for the treated unit(s). For SingleUnitTreatment designs, this is a vector of length T₁, while for MultiUnitTreatment designs, it is a (Nₜᵣ×T₁) matrix """ -function get_y₁₁(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function get_y₁₁(x::BalancedPanel{SingleUnitTreatment{Continuous}}) x.Y[x.W] end -""" +""" sc get_y₀₀(x <: BalancedPanel) Returns the pre-treatment outcomes for the untreated units, an (Nₖₒ×T₀) matrix """ -function get_y₀₀(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function get_y₀₀(x::BalancedPanel{SingleUnitTreatment{Continuous}}) x.Y[Not(treated_ids(x)), 1:first_treated_period_ids(x)-1] end @@ -263,19 +271,21 @@ end Returns the post-treatment outcomes for the untreated units, an (Nₖₒ×T₁) matrix """ -function get_y₀₁(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: ContinuousTreatment +function get_y₀₁(x::BalancedPanel{SingleUnitTreatment{Continuous}}) x.Y[Not(treated_ids(x)), first_treated_period_ids(x):end] end """ - get_y₀₀(x <: BalancedPanel) + decompose_y(x <: BalancedPanel) Decomposes the outcome matrix Y into four elements: * Pre-treatment outcomes for treated units (y₁₀) - * Post-treatment outcomes for treated units (y₁₀) + * Post-treatment outcomes for treated units (y₁₁) * Pre-treatment outcomes for control units (y₀₀) * Post-treatment outcomes for treated units (y₀₁) + + and returns a tuple (y₁₀, y₁₁, y₀₀, y₀₁) """ function decompose_y(x) get_y₁₀(x), get_y₁₁(x), get_y₀₀(x), get_y₀₁(x) @@ -283,7 +293,7 @@ end #################################################################################################### -# Constructor for single start/end treatment - returns BalancedPanel{SingleUnitTreatment, StartEndTreatment} +# Constructor for single start/end treatment - returns BalancedPanel{SingleUnitTreatment{Discontinuous}} function BalancedPanel(df::DataFrame, treatment_assignment::Pair{T1, T2}; id_var = nothing, t_var = nothing, outcome_var = nothing, sort_inplace = false) where T1 where T2 <: Union{Pair{Date, Date}, Pair{Int, Int}} @@ -324,35 +334,9 @@ function BalancedPanel(df::DataFrame, treatment_assignment::Pair{T1, T2}; end end - BalancedPanel{SingleUnitTreatment, StartEndTreatment}(N, T, W, ts, is, Y) -end - -function first_treated_period_ids(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: StartEndTreatment - ids = Int64[] - treated_row = vec(@view x.W[treated_ids(x), :]) - for t ∈ 2:x.T - if treated_row[t] == 1 && treated_row[t-1] == 0 - push!(ids, t) - end - end - - return ids -end - -function first_treated_period_labels(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: StartEndTreatment - x.ts[first_treated_period_ids(x)] -end - -function length_T₀(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: StartEndTreatment - first(first_treated_period_ids(x)) - 1 + BalancedPanel{SingleUnitTreatment{Discontinuous}}(W, Y, df, id_var, t_var, outcome_var, ts, is) end -function length_T₁(x::BalancedPanel{SingleUnitTreatment, T2}) where T2 <: StartEndTreatment - x.T .- last(first_treated_period_ids(x)) + 1 -end - - - # Fallback method - if the length of treatment assignment is one use single treatment method above function BalancedPanel(df::DataFrame, treatment_assignment; id_var = nothing, t_var = nothing, outcome_var = nothing, @@ -395,64 +379,35 @@ function BalancedPanel(df::DataFrame, treatment_assignment; W = construct_W(treatment_assignment, N, T, is, ts) # Outcome matrix - Y = zeros(size(W)) + Y = zeros(eltype(df[!, outcome_var]), size(W)) for (row, i) ∈ enumerate(is), (col, t) ∈ enumerate(ts) - try - Y[row, col] = only(df[(df[!, id_var] .== i) .& (df[!, t_var] .== t), outcome_var]) - catch ArgumentError - throw("$(nrow(df[(df[!, id_var] .== i) .& (df[!, t_var] .== t), :])) outcomes present in the data for unit $i in period $t") - end + Y[row, col] = only(df[(df[!, id_var] .== i) .& (df[!, t_var] .== t), outcome_var]) end - # Determine UnitTreatmentType and TreatmentDurationType + # Determine TreatmentType and TreatmentDurationType uttype = if all(==(treatment_assignment[1][2]), last.(treatment_assignment)) - MultiUnitSimultaneousTreatment + Simultaneous else - MultiUnitStaggeredTreatment + Staggered end tdtype = if typeof(treatment_assignment) <: Pair if typeof(treatment_assignment[2]) <: Pair - StartEndTreatment + Discontinuous else - ContinuousTreatment + Continuous end else if typeof(treatment_assignment[1][2]) <: Pair - StartEndTreatment + Discontinuous else - ContinuousTreatment + Continuous end end - BalancedPanel{uttype, tdtype}(N, T, W, ts, is, Y) + BalancedPanel{MultiUnitTreatment{uttype{tdtype}}}(W, Y, df, id_var, t_var, outcome_var, ts, is) end ## UnblancedPanel - N observations but not all of them for T periods -#!# Not yet implemented - -## Utility functions -function treated_ids(x::BalancedPanel) - any.(eachrow(x.W)) -end - -function treated_labels(x::BalancedPanel) - x.is[treated_ids(x)] -end - -function first_treated_period_ids(x::BalancedPanel) - findfirst.(eachrow(x.W[treated_ids(x), :])) -end - -function first_treated_period_labels(x::BalancedPanel) - x.ts[first_treated_period_ids(x)] -end - -function length_T₀(x::BalancedPanel) - first_treated_period_ids(x) .- 1 -end - -function length_T₁(x::BalancedPanel) - x.T .- first_treated_period_ids(x) .+ 1 -end \ No newline at end of file +#!# Not yet implemented \ No newline at end of file diff --git a/src/TreatmentPanels.jl b/src/TreatmentPanels.jl index 034bb89..1b1fe0a 100644 --- a/src/TreatmentPanels.jl +++ b/src/TreatmentPanels.jl @@ -9,9 +9,11 @@ export BalancedPanel, UnbalancedPanel # Export treatment description types export UnitTreatmentType -export SingleUnitTreatment, MultiUnitSimultaneousTreatment, MultiUnitStaggeredTreatment +export SingleUnitTreatment, MultiUnitTreatment +export TreatmentTimingType +export Staggered, Simultaneous export TreatmentDurationType -export ContinuousTreatment, StartEndTreatment +export Continuous, Discontinuous # Export utility functions export treated_ids, treated_labels, first_treated_period_ids, first_treated_period_labels, length_T₀, length_T₁ diff --git a/src/show_and_plot.jl b/src/show_and_plot.jl index b3f87d0..3bc6dfd 100644 --- a/src/show_and_plot.jl +++ b/src/show_and_plot.jl @@ -1,8 +1,15 @@ using RecipesBase # Custom show methods -function Base.show(io::IO, mime::MIME"text/plain", x::BalancedPanel{SingleUnitTreatment, ContinuousTreatment}) - println("Balanced Panel - single unit, single continuous treatment") +title(x::BalancedPanel{SingleUnitTreatment{Continuous}}) = "single treated unit, continuous treatment" +title(x::BalancedPanel{SingleUnitTreatment{Discontinuous}}) = "single treated unit, discontinuous treatment" +title(x::BalancedPanel{MultiUnitTreatment{Simultaneous{Continuous}}}) = "multiple treated units, simultaneous continuous treatment" +title(x::BalancedPanel{MultiUnitTreatment{Simultaneous{Discontinuous}}}) = "multiple treated units, simultaneous discontinuous treatment" +title(x::BalancedPanel{MultiUnitTreatment{Staggered{Continuous}}}) = "multiple treated units, staggered continuous treatment" +title(x::BalancedPanel{MultiUnitTreatment{Staggered{Discontinuous}}}) = "multiple treated units, staggered discontinuous treatment" + +function Base.show(io::IO, mime::MIME"text/plain", x::BalancedPanel{SingleUnitTreatment{Continuous}}) + println("Balanced Panel - $(title(x))") println(" Treated unit: $(treated_labels(x))") println(" Number of untreated units: $(size(x.Y, 1) - 1)") println(" First treatment period: $(first_treated_period_labels(x))") @@ -10,6 +17,7 @@ function Base.show(io::IO, mime::MIME"text/plain", x::BalancedPanel{SingleUnitTr println(" Number of treatment periods: $(length_T₁(x))") end +#!# TO DO - add show methods for other panel types # Plotting recipe @recipe function f(bp::BalancedPanel; kind = "treatment") diff --git a/test/runtests.jl b/test/runtests.jl index 8b6cfc7..4e8a996 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,23 +16,46 @@ end @testset "Single unit, continuous treatment" begin - int_treatment = "a" => 2 - date_treatment = "a" => Date(2001) + int_treatment = "Single Treated Unit" => 5 + date_treatment = "Single Treated Unit" => Date(2004) - test_df = DataFrame(id = ["a", "a", "b", "b"], - int_period = [1, 2, 1, 2], date_period = [Date(2000), Date(2001), Date(2000), Date(2001)], - value = 1.0:4.0) + single_continuous_data = DataFrame( + name = [fill("Single Treated Unit", 8); fill("Untreated Unit 1", 8); fill("Untreated Unit 2", 8)], + period = repeat(1:8, 3), + year_period = repeat(Date(2000):Year(1):Date(2007), 3), + outcome = vec([parse(Int, "$(i)$(t)") for t ∈ 1:8, i ∈ 1:3])) # Treatment specified as a single pair - @test BalancedPanel(test_df, int_treatment; - id_var = :id, t_var = :int_period, outcome_var = :value) isa BalancedPanel{SingleUnitTreatment, ContinuousTreatment} + sc_bp_int = BalancedPanel(single_continuous_data, int_treatment; + id_var = :name, t_var = :period, outcome_var = :outcome) + @test sc_bp_int isa BalancedPanel{SingleUnitTreatment{Continuous}} # Treatment specified as a length one vector of pairs - @test BalancedPanel(test_df, [int_treatment]; - id_var = :id, t_var = :int_period, outcome_var = :value) isa BalancedPanel{SingleUnitTreatment, ContinuousTreatment} - + + sc_bp_int2 = BalancedPanel(single_continuous_data, [int_treatment]; + id_var = :name, t_var = :period, outcome_var = :outcome) + @test sc_bp_int2 isa BalancedPanel{SingleUnitTreatment{Continuous}} + # Year treatment - @test BalancedPanel(test_df, date_treatment; - id_var = :id, t_var = :date_period, outcome_var = :value) isa BalancedPanel{SingleUnitTreatment, ContinuousTreatment} + sc_bp_year = BalancedPanel(single_continuous_data, date_treatment; + id_var = :name, t_var = :year_period, outcome_var = :outcome) + @test sc_bp_year isa BalancedPanel{SingleUnitTreatment{Continuous}} + + # Utility functions + y₁₀, y₁₁, y₀₀, y₀₁ = decompose_y(sc_bp_year) + @test y₁₀ == [11.0, 12.0, 13.0, 14.0] + @test y₁₁ == [15.0, 16.0, 17.0, 18.0] + @test y₀₀ == [21.0 22.0 23.0 24.0 + 31.0 32.0 33.0 34.0] + @test y₀₁ == [25.0 26.0 27.0 28.0 + 35.0 36.0 37.0 38.0] + + @test length_T₀(sc_bp_year) == 4 + @test length_T₁(sc_bp_year) == 4 + @test treated_ids(sc_bp_year) == 1 + @test treated_labels(sc_bp_year) == "Single Treated Unit" + @test first_treated_period_ids(sc_bp_year) == 5 + @test first_treated_period_ids(sc_bp_int) == 5 + @test first_treated_period_labels(sc_bp_year) == Date(2004) end @testset "Single unit, single time-limited treatment" begin @@ -45,7 +68,9 @@ end bp = BalancedPanel(test_df, treatment; id_var = :id, t_var = :period, outcome_var = :value) - @test bp isa BalancedPanel{SingleUnitTreatment, StartEndTreatment} + @test bp isa BalancedPanel{SingleUnitTreatment{Discontinuous}} + + #!# TO DO - test utility functions for this case end @testset "Single unit, multiple time-limited treatments" begin @@ -73,7 +98,9 @@ end bp = BalancedPanel(test_df, treatment; id_var = :id, t_var = :period, outcome_var = :value) - @test bp isa BalancedPanel{MultiUnitSimultaneousTreatment, ContinuousTreatment} + @test bp isa BalancedPanel{MultiUnitTreatment{Simultaneous{Continuous}}} + + #!# TO DO - test utility functions for this case end @testset "Multiple units, continuous staggered treatment" begin @@ -86,7 +113,9 @@ end bp = BalancedPanel(test_df, treatment; id_var = :id, t_var = :period, outcome_var = :value) - @test bp isa BalancedPanel{MultiUnitStaggeredTreatment, ContinuousTreatment} + @test bp isa BalancedPanel{MultiUnitTreatment{Staggered{Continuous}}} + + #!# TO DO - test utility functions for this case end #!# TO ADD - tests of plotting functionality \ No newline at end of file