FinanceRoutines.jl

Financial data routines for Julia
Log | Files | Refs | README | LICENSE

WRDS.jl (5678B)


      1 @testset verbose=true "WRDS tests ... downloads and build" begin
      2 
      3     import Dates: Date, year, day
      4     import LibPQ: Connection, execute
      5     import Tables: columntable
      6 
      7     wrds_conn = FinanceRoutines.open_wrds_pg(
      8         get(ENV, "WRDS_USERNAME", ""),
      9         get(ENV, "WRDS_PWD", ""))
     10     @test typeof(wrds_conn) == Connection
     11 
     12     date_range_test        = (Date("2000-01-01"), Date("2002-01-01"))
     13     date_range_test_recent = (Date("2025-01-01"), Date("2025-03-01"))
     14     date_range_test_daily  = (Date("2002-02-01"), Date("2002-02-05"))
     15 
     16     # ----------------------------------------------------------------------------------------- #
     17     @testset "CRSP MSF" begin
     18         println("\033[1m\033[32m    → running\033[0m: CRSP MSF")
     19         df_msf = import_MSF(wrds_conn; date_range = date_range_test);
     20         build_MSF!(df_msf; clean_cols=true);
     21 
     22         @test minimum(skipmissing(df_msf.date)) >= Date("2000-01-01")
     23         @test maximum(skipmissing(df_msf.date)) <= Date("2002-01-01")
     24         @test nrow(df_msf) > 100_000
     25     end
     26 
     27 
     28     # ----------------------------------------------------------------------------------------- #
     29     @testset "CRSP MSF V2" begin
     30         println("\033[1m\033[32m    → running\033[0m: CRSP MSF V2")
     31 
     32         # new version CIZ of crsp msf
     33         @test try
     34             import_MSF_v2(wrds_conn; date_range = date_range_test_recent, logging_level=:info)
     35             true
     36         catch e
     37             @error e
     38             false
     39         end
     40 
     41         df_msf_v2 = import_MSF_v2(wrds_conn; date_range = date_range_test, logging_level=:info)
     42 
     43 
     44         # @test subset(df_msf_v2, [:mthcaldt, :mthprcdt] => (x,y) -> isequal.(x, y) ) |> nrow > 0
     45         @test subset(df_msf_v2, :mthprc => ByRow(x -> !isequal(x, abs(x))) ) |> nrow == 0
     46         @test subset(df_msf_v2, :mthcap => (x -> isequal.(x, 0) ) ) |> nrow == 0
     47 
     48         @test minimum(skipmissing(df_msf_v2.mthcaldt)) >= Date("2000-01-01")
     49         @test maximum(skipmissing(df_msf_v2.mthcaldt)) <= Date("2002-01-01")
     50         @test nrow(df_msf_v2) > 100_000
     51 
     52         # discrepancy in nrow with df_msf_v2 ...
     53 
     54     end
     55 
     56 
     57     # ----------------------------------------------------------------------------------------- #
     58     @testset "CRSP DSF" begin
     59         println("\033[1m\033[32m    → running\033[0m: CRSP DSF")
     60         df_dsf = import_DSF(wrds_conn; date_range = date_range_test_daily)
     61 
     62         @test nrow(df_dsf) > 20_000
     63         @test size(unique(day.(df_dsf.date)), 1) > 1
     64         @test all(map(s -> s in names(df_dsf),
     65             lowercase.(["PERMNO", "DATE", "RET", "PRC", "SHROUT", "VOL"])))
     66     end
     67 
     68 
     69     # ----------------------------------------------------------------------------------------- #
     70     @testset "CRSP DSF V2" begin
     71         println("\033[1m\033[32m    → running\033[0m: CRSP DSF V2")
     72         df_dsf_v2 = import_DSF_v2(wrds_conn; date_range = date_range_test_daily)
     73 
     74         @test nrow(df_dsf_v2) > 20_000
     75         @test size(unique(day.(df_dsf_v2.dlycaldt)), 1) > 1
     76         @test all(map(s -> s in names(df_dsf_v2),
     77             lowercase.(["PERMNO", "DLYCALDT", "DLYRET", "DLYPRC", "DLYVOL", "DLYCAP"])))
     78     end
     79 
     80 
     81 
     82     # ----------------------------------------------------------------------------------------- #
     83     @testset "Compustat FUNDA" begin
     84         println("\033[1m\033[32m    → running\033[0m: Compustat FUNDA")
     85         df_funda = import_Funda(wrds_conn;
     86             date_range = date_range_test,
     87             variables=["PPENT", "NAICSH", "NAICS"])
     88         build_Funda!(df_funda; clean_cols=true)
     89 
     90         # check basic properties of the DataFrame (mainly that it has downloaded)
     91         @test minimum(skipmissing(df_funda.datey)) >= year(Date("2000-01-01"))
     92         @test maximum(skipmissing(df_funda.datey)) <= year(Date("2002-01-01"))
     93         @test nrow(df_funda) > 20_000
     94 
     95         # check that the variables are downloaded and in the dataframe
     96         @test all(map(s -> s in names(df_funda), lowercase.(["PPENT", "NAICSH"])))
     97 
     98     end
     99 
    100 
    101     # ----------------------------------------------------------------------------------------- #
    102     @testset "CRSP-Compustat LINK" begin
    103         println("\033[1m\033[32m    → running\033[0m: CRSP-Compustat LINK")
    104 
    105         df_linktable = FinanceRoutines.import_ccm_link(wrds_conn)
    106         # test on table itself
    107         @test all(map(s -> s in names(df_linktable),
    108                   lowercase.(["GVKEY", "LINKPRIM", "LIID", "LINKTYPE", "PERMNO", "LPERMCO",
    109                               "LINKDT", "LINKENDDT"])))
    110         @test isempty(setdiff(unique(df_linktable.linktype), ["LU", "LC", "LS"]))
    111         @test isempty(setdiff(unique(df_linktable.linkprim), ["P", "C"]))
    112 
    113         # test on the linking
    114         df_msf_v2 = import_MSF_v2(wrds_conn; date_range = date_range_test, logging_level=:info)
    115         df_msf_v2 = select(df_msf_v2, :permno, :mthcaldt=>:date, :datem, :mthret=>:ret, :mthcap)
    116 
    117         df_funda  =  @p import_Funda(wrds_conn; date_range = date_range_test,
    118             variables=["PPENT", "NAICSH"]) |>
    119             build_Funda(__; clean_cols=true)
    120 
    121         df_msf_v2 = link_MSF(df_linktable, df_msf_v2) # merge gvkey on monthly stock file
    122         @test @p df_msf_v2 |> unique(__, [:permno, :gvkey]) |>
    123             groupby(__, :permno) |> combine(__, nrow) |> __.nrow |> unique |>
    124             all( .<=(2) )
    125 
    126         df_ccm = innerjoin(df_msf_v2, df_funda, on = [:gvkey, :datey], matchmissing=:notequal)
    127         @test @p df_msf_v2 |> unique(__, [:permno, :gvkey, :date, :datey]) |>
    128             groupby(__, [:permno, :datey]) |> combine(__, nrow) |> __.nrow |> unique |>
    129             all( .<=(12) )
    130 
    131         @test all(map(s -> s in names(df_ccm), lowercase.(["PPENT", "NAICSH"])))
    132 
    133     end
    134 
    135 
    136 
    137 
    138 end