ImportFamaFrench.jl (10831B)
1 # -------------------------------------------------------------------------------------------------- 2 3 # ImportFamaFrench.jl 4 5 # Collection of functions that import 6 # financial data from Ken French's website into julia 7 # -------------------------------------------------------------------------------------------------- 8 9 10 11 # -------------------------------------------------------------------------------------------------- 12 # Shared helper: download a Ken French zip and extract the CSV entry 13 # -------------------------------------------------------------------------------------------------- 14 function _download_ff_zip(url) 15 http_response = Downloads.download(url) 16 z = ZipFile.Reader(http_response) 17 csv_file = filter(x -> match(r".*csv", lowercase(x.name)) !== nothing, z.files)[1] 18 return (z, csv_file) 19 end 20 21 22 # -------------------------------------------------------------------------------------------------- 23 """ 24 import_FF3(;frequency::Symbol=:monthly) -> DataFrame 25 26 Import Fama-French 3-factor model data directly from Ken French's data library. 27 28 Downloads and parses the Fama-French research data factors (market risk premium, 29 size factor, value factor, and risk-free rate) at the specified frequency. 30 31 # Arguments 32 - `frequency::Symbol=:monthly`: Data frequency to import. Options are: 33 - `:monthly` - Monthly factor returns (default) 34 - `:annual` - Annual factor returns 35 - `:daily` - Daily factor returns 36 37 # Returns 38 - `DataFrame`: Fama-French 3-factor data with columns: 39 - **Monthly/Annual**: `datem`/`datey`, `mktrf`, `smb`, `hml`, `rf` 40 - **Daily**: `date`, `mktrf`, `smb`, `hml`, `rf` 41 42 Where: 43 - `mktrf`: Market return minus risk-free rate (market risk premium) 44 - `smb`: Small minus big (size factor) 45 - `hml`: High minus low (value factor) 46 - `rf`: Risk-free rate 47 48 # Examples 49 ```julia 50 # Import monthly data (default) 51 monthly_ff = import_FF3() 52 53 # Import annual data 54 annual_ff = import_FF3(frequency=:annual) 55 56 # Import daily data 57 daily_ff = import_FF3(frequency=:daily) 58 ``` 59 60 # Notes 61 - Data is sourced directly from Kenneth French's data library at Dartmouth 62 - Monthly and annual data excludes the daily/monthly breakdowns respectively 63 - Date formats are automatically parsed to appropriate Julia date types 64 - Missing values are filtered out from the datasets 65 - Requires internet connection to download data 66 67 # Data Source 68 Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html 69 """ 70 function import_FF3(;frequency::Symbol=:monthly) 71 url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip" 72 url_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip" 73 col_types = [String7, Float64, Float64, Float64, Float64] 74 75 return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types, 76 col_names_monthly = [:datem, :mktrf, :smb, :hml, :rf], 77 col_names_annual = [:datey, :mktrf, :smb, :hml, :rf], 78 col_names_daily = [:date, :mktrf, :smb, :hml, :rf]) 79 end 80 # -------------------------------------------------------------------------------------------------- 81 82 83 # -------------------------------------------------------------------------------------------------- 84 # Shared import logic for FF3/FF5/momentum — handles all three frequencies 85 # -------------------------------------------------------------------------------------------------- 86 function _import_ff_factors(frequency::Symbol, url_mth_yr, url_daily, col_types; 87 col_names_monthly, col_names_annual, col_names_daily) 88 89 if frequency == :annual 90 91 z, csv_file = _download_ff_zip(url_mth_yr) 92 df = copy(_parse_ff_annual(csv_file, types=col_types, col_names=col_names_annual)) 93 close(z) 94 return df 95 96 elseif frequency == :monthly 97 98 z, csv_file = _download_ff_zip(url_mth_yr) 99 df = copy(_parse_ff_monthly(csv_file, types=col_types, col_names=col_names_monthly)) 100 close(z) 101 transform!(df, col_names_monthly[1] => ByRow(x -> MonthlyDate(x, "yyyymm")) => col_names_monthly[1]) 102 return df 103 104 elseif frequency == :daily 105 106 z, csv_file = _download_ff_zip(url_daily) 107 df = copy(CSV.File(csv_file, header=4, footerskip=1) |> DataFrame) 108 close(z) 109 rename!(df, col_names_daily) 110 date_col = col_names_daily[1] 111 val_col = col_names_daily[2] 112 subset!(df, date_col => ByRow(!ismissing), val_col => ByRow(!ismissing)) 113 transform!(df, :date => ByRow(x -> Date(string(x), "yyyymmdd")) => :date) 114 return df 115 116 else 117 error("Frequency $frequency not known. Options are :daily, :monthly, or :annual") 118 end 119 end 120 # -------------------------------------------------------------------------------------------------- 121 122 123 # -------------------------------------------------------------------------------------------------- 124 function _parse_ff_annual(zip_file; types=nothing, 125 col_names=[:datey, :mktrf, :smb, :hml, :rf]) 126 127 lines = String[] 128 found_annual = false 129 130 # Read all lines from the zip file entry 131 file_lines = split(String(read(zip_file)), '\n') 132 133 for line in file_lines 134 if occursin(r"Annual Factors", line) 135 found_annual = true 136 continue 137 end 138 139 if found_annual 140 # Data lines start with a 4-digit year 141 if occursin(r"^\s*\d{4}", line) 142 clean_line = replace(line, r"[\r]" => "") 143 push!(lines, clean_line) 144 elseif !isempty(lines) && occursin(r"^\s*$", line) 145 # Empty line after we've started collecting data = end of section 146 break 147 end 148 # Otherwise skip (headers, sub-headers, blank lines before data) 149 end 150 end 151 152 if !found_annual 153 error("Annual Factors section not found in file") 154 end 155 156 lines_buffer = IOBuffer(join(lines, "\n")) 157 return CSV.File(lines_buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |> 158 df -> rename!(df, col_names) 159 end 160 # -------------------------------------------------------------------------------------------------- 161 162 163 # -------------------------------------------------------------------------------------------------- 164 function _parse_ff_monthly(zip_file; types=nothing, 165 col_names=[:datem, :mktrf, :smb, :hml, :rf]) 166 167 # Read all lines from the zip file entry 168 file_lines = split(String(read(zip_file)), '\n') 169 170 # Find the first data line (starts with digits, like "192607") 171 skipto = 1 172 for (i, line) in enumerate(file_lines) 173 if occursin(r"^\s*\d{6}", line) 174 skipto = i 175 break 176 end 177 end 178 179 # Collect data lines until we hit "Annual Factors" 180 data_lines = String[] 181 182 for i in skipto:length(file_lines) 183 line = file_lines[i] 184 185 # Stop when we hit Annual Factors section 186 if occursin(r"Annual Factors", line) 187 break 188 end 189 190 # Skip empty lines 191 if occursin(r"^\s*$", line) 192 continue 193 end 194 195 # Add non-empty data lines 196 push!(data_lines, line) 197 end 198 199 # Create IOBuffer with header + data 200 buffer = IOBuffer(join(data_lines, "\n")) 201 202 return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |> 203 df -> rename!(df, col_names) 204 205 end 206 # -------------------------------------------------------------------------------------------------- 207 208 209 # -------------------------------------------------------------------------------------------------- 210 """ 211 import_FF5(;frequency::Symbol=:monthly) -> DataFrame 212 213 Import Fama-French 5-factor model data directly from Ken French's data library. 214 215 Downloads and parses the Fama-French 5-factor research data (market risk premium, 216 size, value, profitability, and investment factors plus the risk-free rate). 217 218 # Arguments 219 - `frequency::Symbol=:monthly`: Data frequency. Options: `:monthly`, `:annual`, `:daily` 220 221 # Returns 222 - `DataFrame` with columns: 223 - **Monthly**: `datem`, `mktrf`, `smb`, `hml`, `rmw`, `cma`, `rf` 224 - **Annual**: `datey`, `mktrf`, `smb`, `hml`, `rmw`, `cma`, `rf` 225 - **Daily**: `date`, `mktrf`, `smb`, `hml`, `rmw`, `cma`, `rf` 226 227 Where: 228 - `mktrf`: Market return minus risk-free rate 229 - `smb`: Small minus big (size) 230 - `hml`: High minus low (value) 231 - `rmw`: Robust minus weak (profitability) 232 - `cma`: Conservative minus aggressive (investment) 233 - `rf`: Risk-free rate 234 235 # Examples 236 ```julia 237 monthly_ff5 = import_FF5() 238 annual_ff5 = import_FF5(frequency=:annual) 239 daily_ff5 = import_FF5(frequency=:daily) 240 ``` 241 242 # Data Source 243 Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html 244 """ 245 function import_FF5(;frequency::Symbol=:monthly) 246 url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_CSV.zip" 247 url_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_daily_CSV.zip" 248 col_types = [String7, Float64, Float64, Float64, Float64, Float64, Float64] 249 250 return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types, 251 col_names_monthly = [:datem, :mktrf, :smb, :hml, :rmw, :cma, :rf], 252 col_names_annual = [:datey, :mktrf, :smb, :hml, :rmw, :cma, :rf], 253 col_names_daily = [:date, :mktrf, :smb, :hml, :rmw, :cma, :rf]) 254 end 255 # -------------------------------------------------------------------------------------------------- 256 257 258 # -------------------------------------------------------------------------------------------------- 259 """ 260 import_FF_momentum(;frequency::Symbol=:monthly) -> DataFrame 261 262 Import Fama-French momentum factor from Ken French's data library. 263 264 # Arguments 265 - `frequency::Symbol=:monthly`: Data frequency. Options: `:monthly`, `:annual`, `:daily` 266 267 # Returns 268 - `DataFrame` with columns: 269 - **Monthly**: `datem`, `mom` 270 - **Annual**: `datey`, `mom` 271 - **Daily**: `date`, `mom` 272 273 # Examples 274 ```julia 275 monthly_mom = import_FF_momentum() 276 daily_mom = import_FF_momentum(frequency=:daily) 277 ``` 278 279 # Data Source 280 Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html 281 """ 282 function import_FF_momentum(;frequency::Symbol=:monthly) 283 url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_CSV.zip" 284 url_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_daily_CSV.zip" 285 col_types = [String7, Float64] 286 287 return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types, 288 col_names_monthly = [:datem, :mom], 289 col_names_annual = [:datey, :mom], 290 col_names_daily = [:date, :mom]) 291 end 292 # --------------------------------------------------------------------------------------------------