panel_fill.jl (5689B)
1 @testset "panel_fill" begin 2 3 # include("./src/PanelData.jl") 4 5 df1 = DataFrame( # missing t=2 for id=1 6 id = [1,1,2,2,2], 7 t = [1,4,1,2,4], 8 a = [1,1,1,0,0]) 9 10 df2 = DataFrame( # missing t=2 for id=1 11 id = ["a","a","b","b","c","c","c"], 12 t = [1,4,8,9,1,2,4], 13 v1 = [1,1,1,6,6,0,0], 14 v2 = [1,2,3,6,6,4,5], 15 v3 = [1,5,4,6,6,15,12.25]) 16 17 df3 = DataFrame( # missing t=2 for id=1 18 id = ["a","a", "b","b", "c","c","c", "d","d","d","d"], 19 t = [Date(1990, 1, 1), Date(1990, 4, 1), Date(1990, 8, 1), Date(1990, 9, 1), 20 Date(1990, 1, 1), Date(1990, 2, 1), Date(1990, 4, 1), 21 Date(1999, 11, 10), Date(1999, 12, 21), Date(2000, 2, 5), Date(2000, 4, 1)], 22 v1 = [1,1, 1,6, 6,0,0, 1,4,11,13], 23 v2 = [1,2,3,6,6,4,5, 1,2,3,4], 24 v3 = [1,5,4,6,6,15,12.25, 21,22.5,17.2,1]) 25 26 # --- test for df1 27 @testset "DF1" begin 28 df1_test = panel_fill(df1, :id, :t, :a, 29 gap=1, method=:backwards, uniquecheck=true, flag=true) 30 @test isequal( 31 select(subset(df1_test, :flag => ByRow(==(:backwards))), :a), 32 DataFrame(a = [1.0, 1.0, 0.0])) 33 # TODO clean up this t est 34 df1_test = panel_fill(df1, :id, :t, :a, 35 gap=1, method=:backwards, uniquecheck=true, flag=true) 36 @test isequal(nrow(df1_test), 8) 37 end 38 39 # --- test for df2 multiple variables 40 @testset "DF2" begin 41 df2_test = panel_fill(df2, :id, :t, [:v1, :v2, :v3], 42 gap=1, method=:backwards, uniquecheck=true, flag=true) 43 @test isequal( 44 select(subset(df2_test, :flag => ByRow(==(:backwards))), r"v"), 45 DataFrame(v1 = [1.0, 1.0, 0.0], v2 = [1.0, 1.0, 4.0], v3 = [1.0, 1.0, 15.0])) 46 47 df2_test = panel_fill(df2, :id, :t, :v1, 48 gap=1, method=:backwards, uniquecheck=true, flag=true) 49 @test isequal((nrow(df2_test), nrow(filter(:v2 => !ismissing, df2_test))), 50 (10, 7)) 51 end 52 53 54 # --- test for df3 multiple variables and dates 55 @testset "DF3" begin 56 # test with dates backwards 57 df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3], 58 gap=Month(1), method=:backwards, uniquecheck=true, flag=true) 59 @test isequal( 60 select(subset(df3_test, :flag => ByRow(==(:backwards))), r"v"), 61 DataFrame(v1 = [1.0, 1.0, 0.0, 4.0, 11.0], 62 v2 = [1.0, 1.0, 4.0, 2.0, 3.0], 63 v3 = [1.0, 1.0, 15.0, 22.5, 17.2])) 64 65 # test in place with dates forwards and only fill some variables and not others 66 df3_test = copy(df3) 67 panel_fill!(df3_test, :id, :t, [:v2], 68 gap=Month(1), method=:forwards, uniquecheck=true, flag=true) 69 @test isequal( 70 select(subset(df3_test, :flag => ByRow(==(:forwards)), skipmissing=true), :v1, :v2), 71 DataFrame(v1 = repeat([missing], inner=5), v2 = [2.0, 2.0, 5.0, 3.0, 4.0])) 72 73 # linear interpolation 74 df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3], 75 gap=Month(1), method=:linear, uniquecheck=true, flag=true) 76 @test isapprox( 77 select(subset(df3_test, :flag => ByRow(==(:linear)), skipmissing=true), r"v") , 78 DataFrame( 79 v1 = [1.0, 1.0, 0.0, 7.5 , 12.0], 80 v2 = [1.333, 1.666, 4.5, 2.5, 3.5], 81 v3 = [2.3333, 3.666, 13.625, 19.85, 9.1]), 82 atol = 0.01) 83 84 # nearest 85 df3_test = panel_fill(df3, :id, :t, :v1, 86 gap=Month(1), method=:nearest, uniquecheck=true, flag=true) 87 @test isequal( 88 select(subset(df3_test, :flag => ByRow(==(:nearest)), skipmissing=true), :v1), 89 DataFrame(v1 = [1.0, 1.0, 0.0, 11.0, 13.0])) 90 91 # -- different time periods 92 df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3], 93 gap=Day(10), method=:forwards, uniquecheck=true, flag=true) 94 @test isequal(nrow(df3_test) , 39) 95 96 end 97 98 end 99 100 101 @testset "panel_fill - flag=false" begin 102 df = DataFrame(id = [1, 1, 2, 2], t = [1, 3, 1, 4], v = [10, 20, 30, 40]) 103 result = panel_fill(df, :id, :t, :v, gap=1, method=:backwards, flag=false) 104 @test !(:flag in names(result)) 105 @test nrow(result) > nrow(df) # should have filled rows 106 end 107 108 109 @testset "panel_fill - invalid method" begin 110 df = DataFrame(id = [1, 1], t = [1, 3], v = [10, 20]) 111 @test_throws Exception panel_fill(df, :id, :t, :v, gap=1, method=:invalid_method) 112 end 113 114 115 @testset "panel_fill - type mismatch" begin 116 # DatePeriod gap with integer time variable 117 df = DataFrame(id = [1, 1], t = [1, 3], v = [10, 20]) 118 @test_throws Exception panel_fill(df, :id, :t, :v, gap=Month(1)) 119 end 120 121 122 @testset "panel_fill - non-unique warning" begin 123 df = DataFrame(id = [1, 1, 1], t = [1, 2, 3], v = [10, 20, 30]) 124 # non-unique: add a duplicate 125 df_dup = vcat(df, DataFrame(id = [1], t = [2], v = [99])) 126 # should warn about non-unique observations 127 @test_logs (:warn, r"non unique"i) begin 128 try 129 panel_fill(df_dup, :id, :t, :v, 130 gap=1, method=:backwards, uniquecheck=true, flag=true) 131 catch 132 # the function may error after warning due to duplicate handling; 133 # we just verify the warning is emitted 134 end 135 end 136 end 137 138 139 @testset "panel_fill - no gaps to fill" begin 140 # consecutive time values, nothing to interpolate 141 df = DataFrame(id = [1, 1, 1], t = [1, 2, 3], v = [10, 20, 30]) 142 result = panel_fill(df, :id, :t, :v, gap=1, method=:backwards, flag=true) 143 @test nrow(result) == 3 # no new rows added 144 @test all(result.flag .== :original) 145 end