FinanceRoutines.jl

Financial data routines for Julia
Log | Files | Refs | README | LICENSE

commit 98848cbd9132330f6dd6abb80fcbffb6141ad731
parent 3032dc570dd21e38320ad173cfd9b1cbe8618e64
Author: Erik Loualiche <[email protected]>
Date:   Sun, 22 Mar 2026 10:30:57 -0500

Generalize FF parsing with shared helpers for FF5 reuse

- Extract _download_ff_zip and _import_ff_factors shared helpers
- Add col_names parameter to _parse_ff_annual and _parse_ff_monthly
- Fix daily filter to use subset! with generic column names
- import_FF3 now delegates to _import_ff_factors

Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>

Diffstat:
Msrc/ImportFamaFrench.jl | 117++++++++++++++++++++++++++++++++++++++++++-------------------------------------
1 file changed, 62 insertions(+), 55 deletions(-)

diff --git a/src/ImportFamaFrench.jl b/src/ImportFamaFrench.jl @@ -9,10 +9,14 @@ # -------------------------------------------------------------------------------------------------- -# List of exported functions -# export import_FF3 # read monthly FF3 +# Shared helper: download a Ken French zip and extract the CSV entry # -------------------------------------------------------------------------------------------------- - +function _download_ff_zip(url) + http_response = Downloads.download(url) + z = ZipFile.Reader(http_response) + csv_file = filter(x -> match(r".*csv", lowercase(x.name)) !== nothing, z.files)[1] + return (z, csv_file) +end # -------------------------------------------------------------------------------------------------- @@ -21,13 +25,13 @@ Import Fama-French 3-factor model data directly from Ken French's data library. -Downloads and parses the Fama-French research data factors (market risk premium, +Downloads and parses the Fama-French research data factors (market risk premium, size factor, value factor, and risk-free rate) at the specified frequency. # Arguments - `frequency::Symbol=:monthly`: Data frequency to import. Options are: - `:monthly` - Monthly factor returns (default) - - `:annual` - Annual factor returns + - `:annual` - Annual factor returns - `:daily` - Daily factor returns # Returns @@ -37,7 +41,7 @@ size factor, value factor, and risk-free rate) at the specified frequency. Where: - `mktrf`: Market return minus risk-free rate (market risk premium) -- `smb`: Small minus big (size factor) +- `smb`: Small minus big (size factor) - `hml`: High minus low (value factor) - `rf`: Risk-free rate @@ -64,110 +68,113 @@ daily_ff = import_FF3(frequency=:daily) Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html """ function import_FF3(;frequency::Symbol=:monthly) + url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip" + url_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip" + col_types = [String7, Float64, Float64, Float64, Float64] + + return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types, + col_names_monthly = [:datem, :mktrf, :smb, :hml, :rf], + col_names_annual = [:datey, :mktrf, :smb, :hml, :rf], + col_names_daily = [:date, :mktrf, :smb, :hml, :rf]) +end +# -------------------------------------------------------------------------------------------------- - ff_col_classes = [String7, Float64, Float64, Float64, Float64]; - url_FF_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip" - url_FF_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip" - # ---------------------------------------------------------------------------------------------- - if frequency==:annual +# -------------------------------------------------------------------------------------------------- +# Shared import logic for FF3/FF5/momentum — handles all three frequencies +# -------------------------------------------------------------------------------------------------- +function _import_ff_factors(frequency::Symbol, url_mth_yr, url_daily, col_types; + col_names_monthly, col_names_annual, col_names_daily) + + if frequency == :annual - http_response = Downloads.download(url_FF_mth_yr); - z = ZipFile.Reader(http_response) ; - a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1] - df_FF3 = copy(_parse_ff_annual(a_file_in_zip, types=ff_col_classes)) + z, csv_file = _download_ff_zip(url_mth_yr) + df = copy(_parse_ff_annual(csv_file, types=col_types, col_names=col_names_annual)) close(z) - return df_FF3 + return df - # ---------------------------------------------------------------------------------------------- - elseif frequency==:monthly + elseif frequency == :monthly - http_response = Downloads.download(url_FF_mth_yr); - z = ZipFile.Reader(http_response) ; - a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1] - df_FF3 = copy(_parse_ff_monthly(a_file_in_zip, types=ff_col_classes)) + z, csv_file = _download_ff_zip(url_mth_yr) + df = copy(_parse_ff_monthly(csv_file, types=col_types, col_names=col_names_monthly)) close(z) - - transform!(df_FF3, :datem => ByRow(x -> MonthlyDate(x, "yyyymm")) => :datem) - return df_FF3 - - - # ---------------------------------------------------------------------------------------------- - elseif frequency==:daily - - http_response = Downloads.download(url_FF_daily); - z = ZipFile.Reader(http_response) ; - a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1] - df_FF3 = copy(CSV.File(a_file_in_zip, header=4, footerskip=1) |> DataFrame); + transform!(df, col_names_monthly[1] => ByRow(x -> MonthlyDate(x, "yyyymm")) => col_names_monthly[1]) + return df + + elseif frequency == :daily + + z, csv_file = _download_ff_zip(url_daily) + df = copy(CSV.File(csv_file, header=4, footerskip=1) |> DataFrame) close(z) - rename!(df_FF3, [:date, :mktrf, :smb, :hml, :rf]); - df_FF3 = @p df_FF3 |> filter(.!ismissing.(_.date) && .!ismissing.(_.mktrf)) - transform!(df_FF3, :date => ByRow(x -> Date(string(x), "yyyymmdd") ) => :date) - return df_FF3 + rename!(df, col_names_daily) + date_col = col_names_daily[1] + val_col = col_names_daily[2] + subset!(df, date_col => ByRow(!ismissing), val_col => ByRow(!ismissing)) + transform!(df, :date => ByRow(x -> Date(string(x), "yyyymmdd")) => :date) + return df - # ---------------------------------------------------------------------------------------------- else error("Frequency $frequency not known. Options are :daily, :monthly, or :annual") end - end # -------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------- -function _parse_ff_annual(zip_file; types=nothing) +function _parse_ff_annual(zip_file; types=nothing, + col_names=[:datey, :mktrf, :smb, :hml, :rf]) lines = String[] found_annual = false - + # Read all lines from the zip file entry file_lines = split(String(read(zip_file)), '\n') - + for line in file_lines if occursin(r"Annual Factors", line) found_annual = true continue end - + if found_annual # Skip the header line that comes after "Annual Factors" if occursin(r"Mkt-RF|SMB|HML|RF", line) continue end - + if occursin(r"^\s*$", line) || occursin(r"[A-Za-z]{3,}", line[1:min(10, length(line))]) - if !occursin(r"^\s*$", line) && !occursin(r"^\s*\d{4}", line) # Added \s* + if !occursin(r"^\s*$", line) && !occursin(r"^\s*\d{4}", line) break end continue end - - if occursin(r"^\s*\d{4}", line) - clean_line = replace(line, r"[\r]" => "") + + if occursin(r"^\s*\d{4}", line) + clean_line = replace(line, r"[\r]" => "") push!(lines, clean_line) end end end - + if !found_annual error("Annual Factors section not found in file") end - + lines_buffer = IOBuffer(join(lines, "\n")) return CSV.File(lines_buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |> - df -> rename!(df, [:datey, :mktrf, :smb, :hml, :rf]) + df -> rename!(df, col_names) end # -------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------- -function _parse_ff_monthly(zip_file; types=nothing) +function _parse_ff_monthly(zip_file; types=nothing, + col_names=[:datem, :mktrf, :smb, :hml, :rf]) # Read all lines from the zip file entry file_lines = split(String(read(zip_file)), '\n') # Find the first data line (starts with digits, like "192607") - # instead of hardcoding a skip count that breaks if the header changes skipto = 1 for (i, line) in enumerate(file_lines) if occursin(r"^\s*\d{6}", line) @@ -200,7 +207,7 @@ function _parse_ff_monthly(zip_file; types=nothing) buffer = IOBuffer(join(data_lines, "\n")) return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |> - df -> rename!(df, [:datem, :mktrf, :smb, :hml, :rf]) + df -> rename!(df, col_names) end # --------------------------------------------------------------------------------------------------