reference.jl (3788B)
1 # ABOUTME: State and county reference data lookup from bundled FIPS lists 2 # ABOUTME: Provides standardization of state/county identifiers (name, abbreviation, FIPS code) 3 4 # Module-level cache for parsed reference data 5 const _STATE_LIST_CACHE = Ref{Vector{Vector{String}}}() 6 const _COUNTY_LIST_CACHE = Ref{Vector{Vector{AbstractString}}}() 7 const _CACHE_INITIALIZED = Ref(false) 8 9 function _ensure_cache() 10 _CACHE_INITIALIZED[] && return 11 paths = get_reference_data() 12 13 # Parse state list 14 state_file = paths["state"] 15 _STATE_LIST_CACHE[] = readlines(state_file) |> 16 l -> split.(l, "|") |> 17 l -> map(s -> String.(s[ [1,2,4] ]), l) |> 18 l -> l[2:end] |> 19 unique 20 21 # Parse county list 22 county_file = paths["county"] 23 _COUNTY_LIST_CACHE[] = readlines(county_file) |> 24 ( l -> split.(l, "|") ) |> 25 ( l -> map(s -> String.(s[ [1,2,3,5] ]), l) ) |> 26 ( l -> l[2:end] ) 27 28 _CACHE_INITIALIZED[] = true 29 return 30 end 31 32 function get_state_list()::Vector{Vector{String}} 33 _ensure_cache() 34 return _STATE_LIST_CACHE[] 35 end 36 37 # Takes a string input (handles names and abbreviations) 38 function standardize_state_input(state_input::String)::Union{Vector{String}, Nothing} 39 normalized_input = uppercase(strip(state_input)) 40 states = get_state_list() 41 matched_state = findfirst(state -> 42 any(uppercase(identifier) == normalized_input for identifier in state), 43 states) 44 return isnothing(matched_state) ? nothing : states[matched_state] 45 end 46 47 # Takes numeric input (handles FIPS codes) 48 function standardize_state_input(fips::Integer)::Union{Vector{String}, Nothing} 49 fips_str = lpad(string(fips), 2, '0') 50 states = get_state_list() 51 matched_state = findfirst(state -> state[2] == fips_str, states) 52 return isnothing(matched_state) ? nothing : states[matched_state] 53 end 54 55 # Handles the default case 56 standardize_state_input(::Nothing) = nothing 57 58 59 # ------------------------------------------------------------------------------------------------- 60 61 function get_county_list(state=nothing)::Vector{Vector{AbstractString}} 62 _ensure_cache() 63 county_list = _COUNTY_LIST_CACHE[] 64 65 if isnothing(state) 66 return county_list 67 elseif !isnothing(tryparse(Int, state)) # then its the fips 68 return unique(filter(l -> l[2] == state, county_list)) 69 else # then its the abbreviation state name 70 return unique(filter(l -> l[1] == state, county_list)) 71 end 72 73 end 74 75 76 77 function standardize_county_input( 78 county_input::Union{String, Integer}, 79 state_fips::String)::Union{Vector{String}, Nothing} 80 81 # Handle numeric input (FIPS code) 82 if county_input isa Integer 83 # Convert to three-digit string with leading zeros 84 county_fips = lpad(string(county_input), 3, '0') 85 return find_county(county_fips, state_fips) 86 end 87 88 # Handle string input (name or FIPS) 89 normalized_input = uppercase(strip(county_input)) 90 return find_county(normalized_input, state_fips) 91 end 92 93 94 function find_county(identifier::String, state_fips::String)::Union{Vector{String}, Nothing} 95 96 counties = get_county_list(state_fips) 97 98 COUNTY_SUFFIXES = ["COUNTY", "MUNICIPIO", "BOROUGH", "PARISH", "MUNICIPALITY", "CENSUS AREA"] 99 clean_county_name(name::String) = replace(uppercase(strip(name)), 100 Regex("\\s+(" * join(COUNTY_SUFFIXES, "|") * ")\$") => "") 101 clean_identifier = clean_county_name(uppercase(identifier)) 102 103 # Try to match based on any identifier in the county vector only on fips and name to avoid false positive 104 matched_county = findfirst( 105 county -> any(clean_county_name(id) == clean_identifier for id in county[[3,4]]), 106 counties) 107 108 return isnothing(matched_county) ? nothing : counties[matched_county] 109 end 110 111 112 113 114 115 116 117 118