commit 64fb0a0d9ea414724e74cbc6cd5a46ea25251250
parent 18a4e925ce2cdf47027206aa5821f339ab1aa4f0
Author: Erik Loualiche <[email protected]>
Date: Wed, 21 May 2025 13:38:39 -0500
Merge pull request #1 from eloualiche/feature/paneldata
Feature/paneldata
Diffstat:
4 files changed, 47 insertions(+), 39 deletions(-)
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
name = "BazerData"
uuid = "9777a11d-2328-4b97-9b51-b265bb408da6"
authors = ["Erik Loualiche"]
-version = "0.7.2"
+version = "0.7.3"
[deps]
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
diff --git a/src/PanelData.jl b/src/PanelData.jl
@@ -1,6 +1,6 @@
# ------------------------------------------------------------------------------------------
"""
- panel_fill(
+ panel_fill!(
df::DataFrame,
id_var::Symbol,
time_var::Symbol,
@@ -25,7 +25,6 @@
email me for other interpolations (anything from Interpolations.jl is possible)
- `uniquecheck::Bool = true`: check if panel is clean
- `flag::Bool = false`: flag the interpolated values
-- `merge::Bool = false`: merge the new values with the input dataset
# Returns
- `AbstractDataFrame`:
@@ -33,17 +32,15 @@
# Examples
- See tests
"""
-function panel_fill(
+function panel_fill!(
df::DataFrame,
id_var::Symbol, time_var::Symbol, value_var::Union{Symbol, Vector{Symbol}};
gap::Union{Int, DatePeriod} = 1,
method::Symbol = :backwards,
uniquecheck::Bool = true,
flag::Bool = false,
- merge::Bool = false
)
-
# prepare the data
sort!(df, [id_var, time_var])
if isa(value_var, Symbol)
@@ -136,24 +133,24 @@ function panel_fill(
transform!(df_fill, time_var_r => time_var)
end
- if merge
- if flag
- df[!, :flag] .= :original
- end
- return sort(vcat(df, df_fill, cols=:union), [id_var, time_var])
- else
- return df_fill
+ if flag
+ df[!, :flag] .= :original
end
+ append!(df, df_fill, cols=:union)
+ sort!(df, [id_var, time_var])
+
+ return df
+
end
"""
- panel_fill!(...)
+ panel_fill(...)
- Same as panel_fill but with modification in place
+ Same as panel_fill but without modification in place in place
"""
-function panel_fill!(
+function panel_fill(
df::DataFrame,
id_var::Symbol, time_var::Symbol, value_var::Union{Symbol, Vector{Symbol}};
gap::Union{Int, DatePeriod} = 1,
@@ -162,12 +159,13 @@ function panel_fill!(
flag::Bool = false
)
- df_fill = panel_fill(df, id_var, time_var, value_var,
+ df_res = copy(df)
+
+ panel_fill!(df_res, id_var, time_var, value_var,
gap = gap, method = method, uniquecheck = uniquecheck, flag = flag)
- append!(df, df_fill, cols=:union)
- sort!(df, [id_var, time_var])
+
+ return df_res
- return nothing
end
diff --git a/test/UnitTests/panel_fill.jl b/test/UnitTests/panel_fill.jl
@@ -27,11 +27,12 @@
@testset "DF1" begin
df1_test = panel_fill(df1, :id, :t, :a,
gap=1, method=:backwards, uniquecheck=true, flag=true)
- @test isequal(select(df1_test, :a),
- DataFrame(a = [0.0, 1.0, 1.0]))
+ @test isequal(
+ select(subset(df1_test, :flag => ByRow(==(:backwards))), :a),
+ DataFrame(a = [1.0, 1.0, 0.0]))
# TODO clean up this t est
df1_test = panel_fill(df1, :id, :t, :a,
- gap=1, method=:backwards, uniquecheck=true, flag=true, merge=true)
+ gap=1, method=:backwards, uniquecheck=true, flag=true)
@test isequal(nrow(df1_test), 8)
end
@@ -39,11 +40,12 @@
@testset "DF2" begin
df2_test = panel_fill(df2, :id, :t, [:v1, :v2, :v3],
gap=1, method=:backwards, uniquecheck=true, flag=true)
- @test isequal(select(df2_test, r"v"),
- DataFrame(v1 = [0.0, 1.0, 1.0], v2 = [4.0, 1.0, 1.], v3 = [15.0, 1.0, 1.0]))
+ @test isequal(
+ select(subset(df2_test, :flag => ByRow(==(:backwards))), r"v"),
+ DataFrame(v1 = [1.0, 1.0, 0.0], v2 = [1.0, 1.0, 4.0], v3 = [1.0, 1.0, 15.0]))
df2_test = panel_fill(df2, :id, :t, :v1,
- gap=1, method=:backwards, uniquecheck=true, flag=true, merge=true)
+ gap=1, method=:backwards, uniquecheck=true, flag=true)
@test isequal((nrow(df2_test), nrow(filter(:v2 => !ismissing, df2_test))),
(10, 7))
end
@@ -54,9 +56,11 @@
# test with dates backwards
df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3],
gap=Month(1), method=:backwards, uniquecheck=true, flag=true)
- @test isequal(select(df3_test, :v1, :v2, :v3),
- DataFrame(v1 = [4.0, 11.0, 0.0, 1.0, 1.0], v2 = [2.0, 3.0, 4.0, 1.0, 1.0],
- v3 = [22.5, 17.2, 15.0, 1.0, 1.0]))
+ @test isequal(
+ select(subset(df3_test, :flag => ByRow(==(:backwards))), r"v"),
+ DataFrame(v1 = [1.0, 1.0, 0.0, 4.0, 11.0],
+ v2 = [1.0, 1.0, 4.0, 2.0, 3.0],
+ v3 = [1.0, 1.0, 15.0, 22.5, 17.2]))
# test in place with dates forwards and only fill some variables and not others
df3_test = copy(df3)
@@ -68,16 +72,21 @@
# linear interpolation
df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3],
- gap=Month(1), method=:linear, uniquecheck=true, flag=true, merge=false)
- @test isapprox(select(df3_test, r"v"),
- DataFrame(v1 = [7.5 , 12.0, 0.0, 1.0, 1.0], v2 = [2.5, 3.5, 4.5, 1.333, 1.666],
- v3 = [19.85, 9.1, 13.625, 2.3333, 3.666]),
- atol = 0.01)
+ gap=Month(1), method=:linear, uniquecheck=true, flag=true)
+ @test isapprox(
+ select(subset(df3_test, :flag => ByRow(==(:linear)), skipmissing=true), r"v") ,
+ DataFrame(
+ v1 = [1.0, 1.0, 0.0, 7.5 , 12.0],
+ v2 = [1.333, 1.666, 4.5, 2.5, 3.5],
+ v3 = [2.3333, 3.666, 13.625, 19.85, 9.1]),
+ atol = 0.01)
# nearest
df3_test = panel_fill(df3, :id, :t, :v1,
- gap=Month(1), method=:nearest, uniquecheck=true, flag=true, merge=false)
- @test isequal(select(df3_test, :v1), DataFrame(v1 = [11.0, 13.0, 0.0, 1.0, 1.0]))
+ gap=Month(1), method=:nearest, uniquecheck=true, flag=true)
+ @test isequal(
+ select(subset(df3_test, :flag => ByRow(==(:nearest)), skipmissing=true), :v1),
+ DataFrame(v1 = [1.0, 1.0, 0.0, 11.0, 13.0]))
# TODO clean up these tests
@@ -86,7 +95,7 @@
# panel_fill(df3, :id, :t, [:v1, :v2, :v3],
# gap=Month(2), method=:backwards, uniquecheck=true, flag=true, merge=true)
df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3],
- gap=Day(10), method=:forwards, uniquecheck=true, flag=true, merge=true)
+ gap=Day(10), method=:forwards, uniquecheck=true, flag=true)
@test isequal(nrow(df3_test) , 39)
end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -10,8 +10,9 @@ import StatsBase: quantile, Weights, sample
using StreamToString
const testsuite = [
- "tabulate", "xtile", "winsorize", "panel_fill",
- "timeshift"
+ "tabulate",
+ "xtile", "winsorize",
+ "panel_fill", "timeshift"
]
ENV["DATADEPS_ALWAYS_ACCEPT"] = true # for data loading of PalmerPenguins