CAY.jl (7219B)
1 # src/CAY.jl 2 module CAY 3 4 using DataFrames, Dates, Statistics, LinearAlgebra, CSV 5 include("FredUtils.jl") 6 using .FredUtils 7 8 export compute_cay 9 10 # FRED series IDs for CAY components 11 const SERIES = Dict( 12 :consumption => "PCEC", # Total PCE, Q, Bil $, SAAR 13 :net_worth => "TNWBSHNO", # HH Net Worth, Q, Mil $ 14 :wages => "WASCUR", # Wages & Salary Accruals, Q, Bil $ 15 :transfers => "A577RC1Q027SBEA", # Personal Current Transfer Receipts, Q 16 :other_labor => "B040RC1Q027SBEA", # Employer Pension/Insurance Contrib, Q 17 :social_ins => "A061RC1Q027SBEA", # Contributions for Govt Social Ins, Q 18 :personal_income => "PINCOME", # Personal Income, Q, Bil $ 19 :personal_taxes => "W055RC1Q027SBEA", # Personal Current Taxes, Q 20 :deflator => "PCECTPI", # PCE Price Index, Q, Index 21 :population => "B230RC0Q173SBEA", # BEA Midperiod Pop, Q, Thousands 22 ) 23 24 """ 25 download_macro_data() -> DataFrame 26 27 Download all macro components from FRED and return a quarterly DataFrame. 28 """ 29 function download_macro_data() 30 println("Checking FRED series metadata:") 31 for (name, sid) in sort(collect(SERIES), by=first) 32 info = fred_series_info(sid) 33 println(" $name ($sid): $(info.frequency), $(info.units), $(info.observation_start)–$(info.observation_end)") 34 end 35 36 raw = Dict{Symbol, DataFrame}() 37 for (name, sid) in SERIES 38 raw[name] = fred_observations(sid) 39 println("Downloaded $name: $(nrow(raw[name])) obs") 40 end 41 42 # Consumption 43 df = rename(raw[:consumption], :value => :consumption) 44 45 # Net worth: Millions -> Billions 46 nw = copy(raw[:net_worth]) 47 nw.value ./= 1000.0 48 rename!(nw, :value => :net_worth) 49 df = innerjoin(df, nw, on=:date) 50 51 # Labor income construction 52 wages = rename(raw[:wages], :value => :wages) 53 trans = rename(raw[:transfers], :value => :transfers) 54 other = rename(raw[:other_labor], :value => :other_labor) 55 si = rename(raw[:social_ins], :value => :social_ins) 56 pi = rename(raw[:personal_income], :value => :personal_income) 57 tax = rename(raw[:personal_taxes], :value => :personal_taxes) 58 59 li = innerjoin(wages, trans, other, si, pi, tax, on=:date) 60 li.li_pretax = li.wages .+ li.transfers .+ li.other_labor .- li.social_ins 61 li.labor_share = li.li_pretax ./ li.personal_income 62 li.labor_income = li.li_pretax .- li.labor_share .* li.personal_taxes 63 df = innerjoin(df, select(li, :date, :labor_income), on=:date) 64 65 # Deflator and population 66 defl = rename(raw[:deflator], :value => :deflator) 67 pop = rename(raw[:population], :value => :population) 68 df = innerjoin(df, defl, pop, on=:date) 69 70 dropmissing!(df) 71 return df 72 end 73 74 """ 75 process_components(df::DataFrame) -> DataFrame 76 77 Convert nominal aggregates to log real per-capita: c, a, y. 78 """ 79 function process_components(df::DataFrame) 80 # Convert start-of-quarter dates to end-of-quarter (to match Lettau convention) 81 out = DataFrame(date = Dates.lastdayofmonth.(df.date .+ Dates.Month(2))) 82 # Real per-capita in dollars per person: 83 # consumption/income in billions, population in thousands 84 # => multiply by 1e9 / 1e3 = 1e6 to get dollars per person 85 # deflator is index with base=100, so divide by (deflator/100) 86 out.c = log.(df.consumption ./ df.deflator .* 100 ./ df.population .* 1e6) 87 out.a = log.(df.net_worth ./ df.deflator .* 100 ./ df.population .* 1e6) 88 out.y = log.(df.labor_income ./ df.deflator .* 100 ./ df.population .* 1e6) 89 return out 90 end 91 92 """ 93 estimate_dols(c, a, y; K=8) -> NamedTuple 94 95 Stock-Watson Dynamic OLS for cointegrating regression: 96 c_t = α + β_a*a_t + β_y*y_t + Σ_{j=-K}^{K} γ_j*Δa_{t+j} + Σ_{j=-K}^{K} δ_j*Δy_{t+j} + ε_t 97 """ 98 function estimate_dols(c::Vector{Float64}, a::Vector{Float64}, y::Vector{Float64}; K::Int=8) 99 T = length(c) 100 @assert length(a) == T && length(y) == T 101 102 # Valid range: need Δx[t+j] for j in -K:K, where Δx[t] = x[t]-x[t-1] 103 # Δx exists for t=2:T, so t+j ∈ [2,T] => t ∈ [K+2, T-K] 104 valid = (K+2):(T-K) 105 n = length(valid) 106 107 ncols = 1 + 2 + 2*(2K+1) # intercept + a,y + leads/lags of Δa,Δy 108 X = zeros(n, ncols) 109 Y = c[valid] 110 111 X[:, 1] .= 1.0 112 X[:, 2] = a[valid] 113 X[:, 3] = y[valid] 114 115 col = 4 116 for j in -K:K 117 X[:, col] = [a[t+j] - a[t+j-1] for t in valid] 118 col += 1 119 end 120 for j in -K:K 121 X[:, col] = [y[t+j] - y[t+j-1] for t in valid] 122 col += 1 123 end 124 125 β = X \ Y 126 α = β[1] 127 β_a = β[2] 128 β_y = β[3] 129 130 # Compute cay for full sample 131 cay = c .- α .- β_a .* a .- β_y .* y 132 133 println("DOLS coefficients: α=$(round(α, digits=4)), β_a=$(round(β_a, digits=4)), β_y=$(round(β_y, digits=4))") 134 println(" (Lettau reference: α≈-0.441, β_a≈0.218, β_y≈0.801)") 135 136 return (; α, β_a, β_y, cay, valid) 137 end 138 139 """ 140 compute_cay(; K=8, estimation_end=nothing) -> NamedTuple 141 142 Full pipeline: download data, process, estimate DOLS. 143 If `estimation_end` is provided, DOLS is estimated only on data up to that date, 144 but cay is computed for the full sample using the estimated coefficients. 145 Returns (; data::DataFrame, beta_a, beta_y, alpha) where data has columns 146 date, c, a, y, cay. 147 """ 148 function compute_cay(; K::Int=8, 149 estimation_start::Union{Date,Nothing}=nothing, 150 estimation_end::Union{Date,Nothing}=nothing) 151 raw = download_macro_data() 152 df = process_components(raw) 153 dropmissing!(df) 154 155 # Restrict estimation sample if requested 156 est_mask = trues(nrow(df)) 157 if estimation_start !== nothing 158 est_mask .&= df.date .>= estimation_start 159 end 160 if estimation_end !== nothing 161 est_mask .&= df.date .<= estimation_end 162 end 163 164 if any(.!est_mask) 165 est_idx = findall(est_mask) 166 println("DOLS estimation sample: $(df.date[est_idx[1]]) to $(df.date[est_idx[end]]) ($(length(est_idx)) obs)") 167 result = estimate_dols(df.c[est_idx], df.a[est_idx], df.y[est_idx]; K) 168 # Recompute cay for full sample using estimated coefficients 169 cay_full = df.c .- result.α .- result.β_a .* df.a .- result.β_y .* df.y 170 else 171 result = estimate_dols(df.c, df.a, df.y; K) 172 cay_full = result.cay 173 end 174 175 data = DataFrame( 176 date = df.date, 177 c = df.c, 178 a = df.a, 179 y = df.y, 180 cay = cay_full, 181 ) 182 return (; data, beta_a=result.β_a, beta_y=result.β_y, alpha=result.α) 183 end 184 185 # When run as script 186 if abspath(PROGRAM_FILE) == @__FILE__ 187 # Estimate DOLS on pre-COVID sample (1951Q4–2019Q3) to avoid distortion from 188 # the pandemic period (large swings in transfers, consumption, and asset values). 189 # The estimated coefficients are then applied to the full sample to extend cay. 190 res = compute_cay(estimation_start=Date(1951,10,1), estimation_end=Date(2019,9,30)) 191 # Write with column names matching Lettau's format: date,c,w,y,cay 192 out = rename(res.data, :a => :w) 193 CSV.write("input/cay_computed.csv", out) 194 println("Wrote input/cay_computed.csv: $(nrow(out)) rows, $(out.date[1]) to $(out.date[end])") 195 end 196 197 end # module