RiskPremium

Measuring the market risk premium
Log | Files | Refs

CAY.jl (7219B)


      1 # src/CAY.jl
      2 module CAY
      3 
      4 using DataFrames, Dates, Statistics, LinearAlgebra, CSV
      5 include("FredUtils.jl")
      6 using .FredUtils
      7 
      8 export compute_cay
      9 
     10 # FRED series IDs for CAY components
     11 const SERIES = Dict(
     12     :consumption     => "PCEC",              # Total PCE, Q, Bil $, SAAR
     13     :net_worth       => "TNWBSHNO",          # HH Net Worth, Q, Mil $
     14     :wages           => "WASCUR",            # Wages & Salary Accruals, Q, Bil $
     15     :transfers       => "A577RC1Q027SBEA",   # Personal Current Transfer Receipts, Q
     16     :other_labor     => "B040RC1Q027SBEA",   # Employer Pension/Insurance Contrib, Q
     17     :social_ins      => "A061RC1Q027SBEA",   # Contributions for Govt Social Ins, Q
     18     :personal_income => "PINCOME",           # Personal Income, Q, Bil $
     19     :personal_taxes  => "W055RC1Q027SBEA",   # Personal Current Taxes, Q
     20     :deflator        => "PCECTPI",           # PCE Price Index, Q, Index
     21     :population      => "B230RC0Q173SBEA",   # BEA Midperiod Pop, Q, Thousands
     22 )
     23 
     24 """
     25     download_macro_data() -> DataFrame
     26 
     27 Download all macro components from FRED and return a quarterly DataFrame.
     28 """
     29 function download_macro_data()
     30     println("Checking FRED series metadata:")
     31     for (name, sid) in sort(collect(SERIES), by=first)
     32         info = fred_series_info(sid)
     33         println("  $name ($sid): $(info.frequency), $(info.units), $(info.observation_start)–$(info.observation_end)")
     34     end
     35 
     36     raw = Dict{Symbol, DataFrame}()
     37     for (name, sid) in SERIES
     38         raw[name] = fred_observations(sid)
     39         println("Downloaded $name: $(nrow(raw[name])) obs")
     40     end
     41 
     42     # Consumption
     43     df = rename(raw[:consumption], :value => :consumption)
     44 
     45     # Net worth: Millions -> Billions
     46     nw = copy(raw[:net_worth])
     47     nw.value ./= 1000.0
     48     rename!(nw, :value => :net_worth)
     49     df = innerjoin(df, nw, on=:date)
     50 
     51     # Labor income construction
     52     wages = rename(raw[:wages], :value => :wages)
     53     trans = rename(raw[:transfers], :value => :transfers)
     54     other = rename(raw[:other_labor], :value => :other_labor)
     55     si    = rename(raw[:social_ins], :value => :social_ins)
     56     pi    = rename(raw[:personal_income], :value => :personal_income)
     57     tax   = rename(raw[:personal_taxes], :value => :personal_taxes)
     58 
     59     li = innerjoin(wages, trans, other, si, pi, tax, on=:date)
     60     li.li_pretax = li.wages .+ li.transfers .+ li.other_labor .- li.social_ins
     61     li.labor_share = li.li_pretax ./ li.personal_income
     62     li.labor_income = li.li_pretax .- li.labor_share .* li.personal_taxes
     63     df = innerjoin(df, select(li, :date, :labor_income), on=:date)
     64 
     65     # Deflator and population
     66     defl = rename(raw[:deflator], :value => :deflator)
     67     pop  = rename(raw[:population], :value => :population)
     68     df = innerjoin(df, defl, pop, on=:date)
     69 
     70     dropmissing!(df)
     71     return df
     72 end
     73 
     74 """
     75     process_components(df::DataFrame) -> DataFrame
     76 
     77 Convert nominal aggregates to log real per-capita: c, a, y.
     78 """
     79 function process_components(df::DataFrame)
     80     # Convert start-of-quarter dates to end-of-quarter (to match Lettau convention)
     81     out = DataFrame(date = Dates.lastdayofmonth.(df.date .+ Dates.Month(2)))
     82     # Real per-capita in dollars per person:
     83     # consumption/income in billions, population in thousands
     84     # => multiply by 1e9 / 1e3 = 1e6 to get dollars per person
     85     # deflator is index with base=100, so divide by (deflator/100)
     86     out.c = log.(df.consumption  ./ df.deflator .* 100 ./ df.population .* 1e6)
     87     out.a = log.(df.net_worth    ./ df.deflator .* 100 ./ df.population .* 1e6)
     88     out.y = log.(df.labor_income ./ df.deflator .* 100 ./ df.population .* 1e6)
     89     return out
     90 end
     91 
     92 """
     93     estimate_dols(c, a, y; K=8) -> NamedTuple
     94 
     95 Stock-Watson Dynamic OLS for cointegrating regression:
     96   c_t = α + β_a*a_t + β_y*y_t + Σ_{j=-K}^{K} γ_j*Δa_{t+j} + Σ_{j=-K}^{K} δ_j*Δy_{t+j} + ε_t
     97 """
     98 function estimate_dols(c::Vector{Float64}, a::Vector{Float64}, y::Vector{Float64}; K::Int=8)
     99     T = length(c)
    100     @assert length(a) == T && length(y) == T
    101 
    102     # Valid range: need Δx[t+j] for j in -K:K, where Δx[t] = x[t]-x[t-1]
    103     # Δx exists for t=2:T, so t+j ∈ [2,T] => t ∈ [K+2, T-K]
    104     valid = (K+2):(T-K)
    105     n = length(valid)
    106 
    107     ncols = 1 + 2 + 2*(2K+1)  # intercept + a,y + leads/lags of Δa,Δy
    108     X = zeros(n, ncols)
    109     Y = c[valid]
    110 
    111     X[:, 1] .= 1.0
    112     X[:, 2] = a[valid]
    113     X[:, 3] = y[valid]
    114 
    115     col = 4
    116     for j in -K:K
    117         X[:, col] = [a[t+j] - a[t+j-1] for t in valid]
    118         col += 1
    119     end
    120     for j in -K:K
    121         X[:, col] = [y[t+j] - y[t+j-1] for t in valid]
    122         col += 1
    123     end
    124 
    125     β = X \ Y
    126     α   = β[1]
    127     β_a = β[2]
    128     β_y = β[3]
    129 
    130     # Compute cay for full sample
    131     cay = c .- α .- β_a .* a .- β_y .* y
    132 
    133     println("DOLS coefficients: α=$(round(α, digits=4)), β_a=$(round(β_a, digits=4)), β_y=$(round(β_y, digits=4))")
    134     println("  (Lettau reference: α≈-0.441, β_a≈0.218, β_y≈0.801)")
    135 
    136     return (; α, β_a, β_y, cay, valid)
    137 end
    138 
    139 """
    140     compute_cay(; K=8, estimation_end=nothing) -> NamedTuple
    141 
    142 Full pipeline: download data, process, estimate DOLS.
    143 If `estimation_end` is provided, DOLS is estimated only on data up to that date,
    144 but cay is computed for the full sample using the estimated coefficients.
    145 Returns (; data::DataFrame, beta_a, beta_y, alpha) where data has columns
    146 date, c, a, y, cay.
    147 """
    148 function compute_cay(; K::Int=8,
    149         estimation_start::Union{Date,Nothing}=nothing,
    150         estimation_end::Union{Date,Nothing}=nothing)
    151     raw = download_macro_data()
    152     df = process_components(raw)
    153     dropmissing!(df)
    154 
    155     # Restrict estimation sample if requested
    156     est_mask = trues(nrow(df))
    157     if estimation_start !== nothing
    158         est_mask .&= df.date .>= estimation_start
    159     end
    160     if estimation_end !== nothing
    161         est_mask .&= df.date .<= estimation_end
    162     end
    163 
    164     if any(.!est_mask)
    165         est_idx = findall(est_mask)
    166         println("DOLS estimation sample: $(df.date[est_idx[1]]) to $(df.date[est_idx[end]]) ($(length(est_idx)) obs)")
    167         result = estimate_dols(df.c[est_idx], df.a[est_idx], df.y[est_idx]; K)
    168         # Recompute cay for full sample using estimated coefficients
    169         cay_full = df.c .- result.α .- result.β_a .* df.a .- result.β_y .* df.y
    170     else
    171         result = estimate_dols(df.c, df.a, df.y; K)
    172         cay_full = result.cay
    173     end
    174 
    175     data = DataFrame(
    176         date = df.date,
    177         c    = df.c,
    178         a    = df.a,
    179         y    = df.y,
    180         cay  = cay_full,
    181     )
    182     return (; data, beta_a=result.β_a, beta_y=result.β_y, alpha=result.α)
    183 end
    184 
    185 # When run as script
    186 if abspath(PROGRAM_FILE) == @__FILE__
    187     # Estimate DOLS on pre-COVID sample (1951Q4–2019Q3) to avoid distortion from
    188     # the pandemic period (large swings in transfers, consumption, and asset values).
    189     # The estimated coefficients are then applied to the full sample to extend cay.
    190     res = compute_cay(estimation_start=Date(1951,10,1), estimation_end=Date(2019,9,30))
    191     # Write with column names matching Lettau's format: date,c,w,y,cay
    192     out = rename(res.data, :a => :w)
    193     CSV.write("input/cay_computed.csv", out)
    194     println("Wrote input/cay_computed.csv: $(nrow(out)) rows, $(out.date[1]) to $(out.date[end])")
    195 end
    196 
    197 end # module