WRDS.jl (5678B)
1 @testset verbose=true "WRDS tests ... downloads and build" begin 2 3 import Dates: Date, year, day 4 import LibPQ: Connection, execute 5 import Tables: columntable 6 7 wrds_conn = FinanceRoutines.open_wrds_pg( 8 get(ENV, "WRDS_USERNAME", ""), 9 get(ENV, "WRDS_PWD", "")) 10 @test typeof(wrds_conn) == Connection 11 12 date_range_test = (Date("2000-01-01"), Date("2002-01-01")) 13 date_range_test_recent = (Date("2025-01-01"), Date("2025-03-01")) 14 date_range_test_daily = (Date("2002-02-01"), Date("2002-02-05")) 15 16 # ----------------------------------------------------------------------------------------- # 17 @testset "CRSP MSF" begin 18 println("\033[1m\033[32m → running\033[0m: CRSP MSF") 19 df_msf = import_MSF(wrds_conn; date_range = date_range_test); 20 build_MSF!(df_msf; clean_cols=true); 21 22 @test minimum(skipmissing(df_msf.date)) >= Date("2000-01-01") 23 @test maximum(skipmissing(df_msf.date)) <= Date("2002-01-01") 24 @test nrow(df_msf) > 100_000 25 end 26 27 28 # ----------------------------------------------------------------------------------------- # 29 @testset "CRSP MSF V2" begin 30 println("\033[1m\033[32m → running\033[0m: CRSP MSF V2") 31 32 # new version CIZ of crsp msf 33 @test try 34 import_MSF_v2(wrds_conn; date_range = date_range_test_recent, logging_level=:info) 35 true 36 catch e 37 @error e 38 false 39 end 40 41 df_msf_v2 = import_MSF_v2(wrds_conn; date_range = date_range_test, logging_level=:info) 42 43 44 # @test subset(df_msf_v2, [:mthcaldt, :mthprcdt] => (x,y) -> isequal.(x, y) ) |> nrow > 0 45 @test subset(df_msf_v2, :mthprc => ByRow(x -> !isequal(x, abs(x))) ) |> nrow == 0 46 @test subset(df_msf_v2, :mthcap => (x -> isequal.(x, 0) ) ) |> nrow == 0 47 48 @test minimum(skipmissing(df_msf_v2.mthcaldt)) >= Date("2000-01-01") 49 @test maximum(skipmissing(df_msf_v2.mthcaldt)) <= Date("2002-01-01") 50 @test nrow(df_msf_v2) > 100_000 51 52 # discrepancy in nrow with df_msf_v2 ... 53 54 end 55 56 57 # ----------------------------------------------------------------------------------------- # 58 @testset "CRSP DSF" begin 59 println("\033[1m\033[32m → running\033[0m: CRSP DSF") 60 df_dsf = import_DSF(wrds_conn; date_range = date_range_test_daily) 61 62 @test nrow(df_dsf) > 20_000 63 @test size(unique(day.(df_dsf.date)), 1) > 1 64 @test all(map(s -> s in names(df_dsf), 65 lowercase.(["PERMNO", "DATE", "RET", "PRC", "SHROUT", "VOL"]))) 66 end 67 68 69 # ----------------------------------------------------------------------------------------- # 70 @testset "CRSP DSF V2" begin 71 println("\033[1m\033[32m → running\033[0m: CRSP DSF V2") 72 df_dsf_v2 = import_DSF_v2(wrds_conn; date_range = date_range_test_daily) 73 74 @test nrow(df_dsf_v2) > 20_000 75 @test size(unique(day.(df_dsf_v2.dlycaldt)), 1) > 1 76 @test all(map(s -> s in names(df_dsf_v2), 77 lowercase.(["PERMNO", "DLYCALDT", "DLYRET", "DLYPRC", "DLYVOL", "DLYCAP"]))) 78 end 79 80 81 82 # ----------------------------------------------------------------------------------------- # 83 @testset "Compustat FUNDA" begin 84 println("\033[1m\033[32m → running\033[0m: Compustat FUNDA") 85 df_funda = import_Funda(wrds_conn; 86 date_range = date_range_test, 87 variables=["PPENT", "NAICSH", "NAICS"]) 88 build_Funda!(df_funda; clean_cols=true) 89 90 # check basic properties of the DataFrame (mainly that it has downloaded) 91 @test minimum(skipmissing(df_funda.datey)) >= year(Date("2000-01-01")) 92 @test maximum(skipmissing(df_funda.datey)) <= year(Date("2002-01-01")) 93 @test nrow(df_funda) > 20_000 94 95 # check that the variables are downloaded and in the dataframe 96 @test all(map(s -> s in names(df_funda), lowercase.(["PPENT", "NAICSH"]))) 97 98 end 99 100 101 # ----------------------------------------------------------------------------------------- # 102 @testset "CRSP-Compustat LINK" begin 103 println("\033[1m\033[32m → running\033[0m: CRSP-Compustat LINK") 104 105 df_linktable = FinanceRoutines.import_ccm_link(wrds_conn) 106 # test on table itself 107 @test all(map(s -> s in names(df_linktable), 108 lowercase.(["GVKEY", "LINKPRIM", "LIID", "LINKTYPE", "PERMNO", "LPERMCO", 109 "LINKDT", "LINKENDDT"]))) 110 @test isempty(setdiff(unique(df_linktable.linktype), ["LU", "LC", "LS"])) 111 @test isempty(setdiff(unique(df_linktable.linkprim), ["P", "C"])) 112 113 # test on the linking 114 df_msf_v2 = import_MSF_v2(wrds_conn; date_range = date_range_test, logging_level=:info) 115 df_msf_v2 = select(df_msf_v2, :permno, :mthcaldt=>:date, :datem, :mthret=>:ret, :mthcap) 116 117 df_funda = @p import_Funda(wrds_conn; date_range = date_range_test, 118 variables=["PPENT", "NAICSH"]) |> 119 build_Funda(__; clean_cols=true) 120 121 df_msf_v2 = link_MSF(df_linktable, df_msf_v2) # merge gvkey on monthly stock file 122 @test @p df_msf_v2 |> unique(__, [:permno, :gvkey]) |> 123 groupby(__, :permno) |> combine(__, nrow) |> __.nrow |> unique |> 124 all( .<=(2) ) 125 126 df_ccm = innerjoin(df_msf_v2, df_funda, on = [:gvkey, :datey], matchmissing=:notequal) 127 @test @p df_msf_v2 |> unique(__, [:permno, :gvkey, :date, :datey]) |> 128 groupby(__, [:permno, :datey]) |> combine(__, nrow) |> __.nrow |> unique |> 129 all( .<=(12) ) 130 131 @test all(map(s -> s in names(df_ccm), lowercase.(["PPENT", "NAICSH"]))) 132 133 end 134 135 136 137 138 end