commit b4f0d3fe2688761fbce881deaf361b724b946dd5
parent 66c9c87654f46370f9e28f48a6b5aa0d1a309ef5
Author: Erik Loualiche <[email protected]>
Date: Sat, 22 Feb 2025 21:45:13 -0600
first code commit ... some divisions work well
Diffstat:
9 files changed, 548 insertions(+), 4 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
/Manifest.toml
+.DS_Store
diff --git a/src/TigerFetch.jl b/src/TigerFetch.jl
@@ -1,5 +1,34 @@
module TigerFetch
-# Write your package code here.
-end
+# --------------------------------------------------------------------------------------------------
+import Comonicon: @cast, @main
+import Downloads
+import Pkg
+using Infiltrator
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+include("artifacts.jl")
+include("geotypes.jl") # Internal type system
+include("reference.jl")
+include("download.jl")
+include("cli.jl")
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------#
+# Export types
+# export download_shapefile # this actually relies on internal types ... that we might not want to export
+# Export CLI function
+export tigerfetch
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
+
+
+
+end # module
diff --git a/src/artifacts.jl b/src/artifacts.jl
@@ -0,0 +1,58 @@
+using Pkg.Artifacts
+
+"""
+Create and bind the artifact for existing files.
+Run this script once to set up the artifact.
+"""
+function create_artifacts()
+ # Create a new artifact
+ artifact_hash = create_artifact() do artifact_dir
+ # Copy your existing files into the artifact directory
+ mkpath(artifact_dir)
+
+ # Assuming your files are in a directory named "assets" at the package root
+ source_dir = joinpath(@__DIR__, "..", "assets")
+
+ # Copy the files
+ cp(joinpath(source_dir, "national_county2020.txt"),
+ joinpath(artifact_dir, "national_county2020.txt"))
+ cp(joinpath(source_dir, "national_state2020.txt"),
+ joinpath(artifact_dir, "national_state2020.txt"))
+ end
+
+ # Bind the artifact in Artifacts.toml
+ bind_artifact!(
+ "Artifacts.toml", # Your Artifacts.toml file
+ "package_assets", # Name for your artifact
+ artifact_hash; # Hash from create_artifact
+ force=true # Overwrite if exists
+ )
+end
+
+"""
+Get the directory containing the artifact files.
+"""
+function artifact_dir()
+ artifact_toml = joinpath(@__DIR__, "..", "Artifacts.toml")
+ # Get the hash from the Artifacts.toml file
+ hash = artifact_hash("package_assets", artifact_toml)
+ if hash === nothing
+ error("Could not find package_assets entry in Artifacts.toml")
+ end
+ # Ensure the artifact is installed
+ ensure_artifact_installed("package_assets", artifact_toml)
+ # Now use the hash to get the path
+ return artifact_path(hash)
+end
+
+
+"""
+Get paths to specific reference files.
+"""
+function get_reference_data()
+ base_path = artifact_dir()
+ return Dict(
+ "county" => joinpath(base_path, "national_county2020.txt"),
+ "state" => joinpath(base_path, "national_state2020.txt")
+ )
+end
diff --git a/src/cli.jl b/src/cli.jl
@@ -0,0 +1,75 @@
+
+const GEOGRAPHY_TYPES = Dict(
+ "state" => State,
+ "county" => County,
+ "cousub" => CountySubdivision,
+ "tract" => Tract,
+ "areawater" => AreaWater,
+)
+
+"""
+Download TIGER/Line shapefiles.
+
+# Arguments
+
+- `type`: Geography type (state, county, cousub, tract)
+- `year`: Data year (default: 2024)
+
+# Options
+
+- `--state`: State identifier (name, abbreviation, or FIPS)
+- `--county`: County identifier (name or FIPS, requires --state)
+- `--output`: Output directory (default: current directory)
+- `--force`: Override existing files
+
+# Examples
+tigerfetch state 2024
+tigerfetch cousub 2024 --state CA
+tigerfetch tract 2024 --state "New York"
+
+"""
+@main function tigerfetch(
+ type::String, year::Int=2024;
+ state::String="",
+ county::String="",
+ output::String=pwd(),
+ force::Bool=false)
+
+ type_lower = lowercase(type)
+ if !haskey(GEOGRAPHY_TYPES, type_lower)
+ throw(ArgumentError("Invalid type. Choose from: $(join(keys(GEOGRAPHY_TYPES), ", "))"))
+ end
+
+ # Get the type and create instance
+ geo_type = GEOGRAPHY_TYPES[type_lower]
+ geo = geo_type(year) # No need to pass scope anymore, it's inherent in the type
+
+ # Dispatch based on the type's hierarchy
+ if geo isa NationalGeography
+ if !isempty(state) || !isempty(county)
+ @warn "State/county options ignored for national-level data"
+ end
+ download_shapefile(geo; output_dir=output, force=force)
+
+ elseif geo isa StateGeography
+ if !isempty(county)
+ @warn "County option ignored for state-level data"
+ end
+ if isempty(state)
+ @warn "No state specified - downloading all states"
+ end
+ state_arg = isempty(state) ? nothing : state
+ download_shapefile(geo; state=state_arg, output_dir=output, force=force)
+
+ elseif geo isa CountyGeography
+ if isempty(state)
+ @warn "No state specified - downloading all states"
+ end
+ if !isempty(county) && isempty(state)
+ throw(ArgumentError("--county option requires --state to be specified"))
+ end
+ state_arg = isempty(state) ? nothing : state
+ county_arg = isempty(county) ? nothing : county
+ download_shapefile(geo; state=state_arg, county=county_arg, output_dir=output, force=force)
+ end
+end
diff --git a/src/download.jl b/src/download.jl
@@ -0,0 +1,155 @@
+
+
+# --------------------------------------------------------------------------------------------------
+# National scope (States, Counties nationally)
+function download_shapefile(
+ geo::T;
+ output_dir::String=pwd(),
+ force::Bool=false) where {T <: NationalGeography}
+
+ geo_type = typeof(geo)
+ filename = "tl_$(geo.year)_us_$(lowercase(tiger_name(geo_type))).zip"
+
+ url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/" * filename
+ output_path = joinpath(output_dir, filename)
+
+ if isfile(output_path) && !force
+ @info "File exists" path=output_path
+ return output_path
+ end
+
+ try
+ @info "Downloading $(description(geo_type))" url=url
+ mkpath(output_dir)
+ Downloads.download(url, output_path)
+ return output_path
+ catch e
+ @error "Download failed" exception=e
+ rethrow(e)
+ end
+end
+# --------------------------------------------------------------------------------------------------
+#
+#
+# --------------------------------------------------------------------------------------------------
+# State scope (CountySubdivisions, Places)
+function download_shapefile(
+ geo::T;
+ state::Union{String, Integer, Nothing}=nothing,
+ output_dir::String=pwd(),
+ force::Bool=false) where T<:StateGeography
+
+ # Get states to process
+ if !isnothing(state)
+ state_info = standardize_state_input(state)
+ if isnothing(state_info)
+ throw(ArgumentError("Invalid state identifier provided"))
+ end
+ states_to_process = [state_info]
+ else
+ @warn "No state specified - downloading all states"
+ states_to_process = get_state_list()
+ end
+
+ # Use the type of geo to get tiger_name
+ geo_type = typeof(geo)
+ base_url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/"
+
+ # Process each state
+ for state_info in states_to_process
+ fips = state_info[2]
+ state_name = state_info[3]
+ filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(geo_type))).zip"
+ url = base_url * filename
+ output_path = joinpath(output_dir, filename)
+
+ if isfile(output_path) && !force
+ @info "File exists" state=state_name path=output_path
+ continue
+ end
+
+ try
+ @info "Downloading" state=state_name url=url
+ Downloads.download(url, output_path)
+ catch e
+ @error "Download failed" state=state_name exception=e
+ continue
+ end
+ end
+end
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+# County scope (Tracts, WaterAreas)
+function download_shapefile(
+ geo::T;
+ state::Union{String, Integer, Nothing}=nothing,
+ county::Union{String, Integer, Nothing}=nothing,
+ output_dir::String=pwd(),
+ force::Bool=false) where {T <: CountyGeography}
+
+
+ # Get states to process
+ if !isnothing(state)
+ state_info = standardize_state_input(state)
+ if isnothing(state_info)
+ throw(ArgumentError("Invalid state identifier: $state"))
+ end
+ states_to_process = [state_info]
+ else
+ @warn "No state specified - downloading all states"
+ states_to_process = get_state_list()
+ end
+
+ # Track failures
+ failed_downloads = String[]
+
+ for state_info in states_to_process
+ state_fips = state_info[2]
+ state_name = state_info[3]
+
+ # @infiltrate
+
+ # Get counties for this state
+ counties = get_county_list(state)
+
+ # Filter for specific county if provided
+ if !isnothing(county)
+ county_info = standardize_county_input(county, state_fips)
+ if isnothing(county_info)
+ throw(ArgumentError("Invalid county identifier for $(state_name)"))
+ end
+ counties = [county_info]
+ end
+
+ for county_info in counties
+ county_fips = county_info[3] # Assuming similar structure to state_info
+ county_name = county_info[4]
+
+ filename = "tl_$(geo.year)_$(state_fips)$(county_fips)_$(lowercase(tiger_name(geo))).zip"
+ url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo))/" * filename
+ output_path = joinpath(output_dir, filename)
+
+ if isfile(output_path) && !force
+ @info "File exists" state=state_name county=county_name path=output_path
+ continue
+ end
+
+ try
+ @info "Downloading" state=state_name county=county_name url=url
+ mkpath(output_dir)
+ Downloads.download(url, output_path)
+ catch e
+ push!(failed_downloads, "$(state_name) - $(county_name)")
+ @error "Download failed" state=state_name county=county_name exception=e
+ continue
+ end
+ end
+ end
+
+ if !isempty(failed_downloads)
+ @warn "Some downloads failed" failed_locations=failed_downloads
+ end
+end
+# --------------------------------------------------------------------------------------------------
diff --git a/src/geotypes.jl b/src/geotypes.jl
@@ -0,0 +1,55 @@
+# Abstract base type
+abstract type TigerGeography end
+
+# Abstract types for each scope
+abstract type NationalGeography <: TigerGeography end
+abstract type StateGeography <: TigerGeography end
+abstract type CountyGeography <: TigerGeography end
+
+# Concrete types with their metadata as constants
+struct State <: NationalGeography
+ year::Int
+end
+const STATE_META = (tiger_name = "STATE", description = "State Boundaries")
+
+struct County <: NationalGeography
+ year::Int
+end
+const COUNTY_META = (tiger_name = "COUNTY", description = "County Boundaries")
+
+struct CountySubdivision <: StateGeography
+ year::Int
+end
+const COUSUB_META = (tiger_name = "COUSUB", description = "County Subdivisions")
+
+struct Tract <: CountyGeography
+ year::Int
+end
+const TRACT_META = (tiger_name = "TRACT", description = "Census Tracts")
+
+struct AreaWater <: CountyGeography
+ year::Int
+end
+const AREAWATER_META = (tiger_name = "AREAWATER", description = "Area Water")
+
+# Helper methods to access metadata
+tiger_name(::Type{State}) = STATE_META.tiger_name
+tiger_name(::Type{County}) = COUNTY_META.tiger_name
+tiger_name(::Type{CountySubdivision}) = COUSUB_META.tiger_name
+tiger_name(::Type{Tract}) = TRACT_META.tiger_name
+tiger_name(::Type{AreaWater}) = AREAWATER_META.tiger_name
+
+tiger_name(x::T) where T <: TigerGeography = tiger_name(T)
+
+description(::Type{State}) = STATE_META.description
+description(::Type{County}) = COUNTY_META.description
+description(::Type{CountySubdivision}) = COUSUB_META.description
+description(::Type{Tract}) = TRACT_META.description
+description(::Type{AreaWater}) = AREAWATER_META.description
+
+description(x::T) where T <: TigerGeography = description(T)
+
+# Helper methods now just reference the type hierarchy
+scope(::Type{T}) where {T <: NationalGeography} = National
+scope(::Type{T}) where {T <: StateGeography} = ByState
+scope(::Type{T}) where {T <: CountyGeography} = ByCounty
diff --git a/src/reference.jl b/src/reference.jl
@@ -0,0 +1,86 @@
+function get_state_list()::Vector{Vector{String}}
+ paths = get_reference_data()
+ state_file = paths["state"]
+
+ # we do not need to load CSV so we read the file by hand
+ state_list = readlines(state_file) |>
+ l -> split.(l, "|") |> # split by vertical bar
+ l -> map(s -> String.(s[ [1,2,4] ]), l) |> # select some columns
+ l -> l[2:end] # remove the header
+
+ return unique(state_list)
+end
+
+# Takes a string input (handles names and abbreviations)
+function standardize_state_input(state_input::String)::Union{Vector{String}, Nothing}
+ normalized_input = uppercase(strip(state_input))
+ states = get_state_list()
+ matched_state = findfirst(state ->
+ any(uppercase(identifier) == normalized_input for identifier in state),
+ states)
+ return isnothing(matched_state) ? nothing : states[matched_state]
+end
+
+# Takes numeric input (handles FIPS codes)
+function standardize_state_input(fips::Integer)::Union{Vector{String}, Nothing}
+ fips_str = lpad(string(fips), 2, '0')
+ states = get_state_list()
+ matched_state = findfirst(state -> state[2] == fips_str, states)
+ return isnothing(matched_state) ? nothing : states[matched_state]
+end
+
+# Handles the default case
+standardize_state_input(::Nothing) = nothing
+
+
+# -------------------------------------------------------------------------------------------------
+
+function get_county_list(state=nothing)::Vector{Vector{AbstractString}}
+ paths = get_reference_data() # Remove TigerFetch. prefix since we're inside the module
+ county_file = paths["county"]
+
+ # we do not need to load CSV so we read the file by hand
+ county_list = readlines(county_file) |>
+ ( l -> split.(l, "|") ) |> # split by vertical bar
+ ( l -> map(s -> String.(s[ [1,2,3,5] ]), l) ) |> # select some columns
+ ( l -> l[2:end] ) # remove the header
+
+ if isnothing(state)
+ return county_list
+ elseif !isnothing(tryparse(Int, state)) # then its the fips
+ return unique(filter(l -> l[2] == state, county_list))
+ else # then its the abbreviation state name
+ return unique(filter(l -> l[1] == state, county_list))
+ end
+
+end
+
+
+
+function standardize_county_input(
+ county_input::Union{String, Integer},
+ state_fips::String)::Union{Vector{String}, Nothing}
+
+ # Handle numeric input (FIPS code)
+ if county_input isa Integer
+ # Convert to three-digit string with leading zeros
+ county_fips = lpad(string(county_input), 3, '0')
+ return find_county(county_fips, state_fips)
+ end
+
+ # Handle string input (name or FIPS)
+ normalized_input = uppercase(strip(county_input))
+ return find_county(normalized_input, state_fips)
+end
+
+
+function find_county(identifier::String, state_fips::String)::Union{Vector{String}, Nothing}
+ counties = get_county_list(state_fips)
+
+ # Try to match based on any identifier in the county vector
+ matched_county = findfirst(county ->
+ any(uppercase(id) == uppercase(identifier) for id in county),
+ counties)
+
+ return isnothing(matched_county) ? nothing : counties[matched_county]
+end
diff --git a/test/UnitTests/assets.jl b/test/UnitTests/assets.jl
@@ -0,0 +1,66 @@
+@testset "Asset Installation Tests" begin
+
+ @testset "Artifact Existence" begin
+
+ # Test that the Artifacts.toml file exists
+ artifact_toml = joinpath(pkgdir(TigerFetch), "Artifacts.toml")
+ @test isfile(artifact_toml)
+
+ # Test that we can get the artifact directory
+ artifact_toml = joinpath(@__DIR__, "..", "..", "Artifacts.toml")
+ @test_nowarn ensure_artifact_installed("package_assets", artifact_toml)
+
+ # Test that the artifact path is valid
+ artifact_path = TigerFetch.artifact_dir()
+ @test isdir(artifact_path)
+ end
+
+ @testset "Reference Data Files" begin
+ # Get reference data paths
+ data_paths = TigerFetch.get_reference_data()
+
+ @testset "County Data File" begin
+ county_path = data_paths["county"]
+ @test isfile(county_path)
+
+ # Test county file content structure
+ content = readlines(county_path)
+ @test length(content) > 0
+ @test occursin("|", first(content))
+ first_line = split(first(content), "|")
+ @test length(first_line) >= 4
+ end
+
+ @testset "State Data File" begin
+ state_path = data_paths["state"]
+ @test isfile(state_path)
+
+ # Test state file content structure
+ content = readlines(state_path)
+ @test length(content) > 0
+ @test occursin("|", first(content))
+
+ first_line = split(first(content), "|")
+ @test length(first_line) >= 4
+ end
+ end
+
+
+
+ @testset "Data Accessibility" begin
+ # Test state list functionality
+ state_list = TigerFetch.get_state_list()
+ @test length(state_list) > 0
+ @test all(x -> length(x) == 3, state_list) # Each state should have 3 identifiers
+
+ # Test county list functionality
+ county_list = TigerFetch.get_county_list()
+ @test length(county_list) > 0
+ @test all(x -> length(x) == 3, county_list) # Each county should have 3 identifiers
+
+ # Test specific state county list
+ al_counties = TigerFetch.get_county_list("AL")
+ @test length(al_counties) > 0
+ @test all(x -> x[1] == "AL", al_counties) # All counties should be from Alabama
+ end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,6 +1,25 @@
+# --------------------------------------------------------------------------------------------------
using TigerFetch
using Test
+using Pkg.Artifacts
+# using LazyArtifacts
-@testset "TigerFetch.jl" begin
- # Write your tests here.
+
+const testsuite = [
+ "assets",
+]
+
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+printstyled("Running tests:\n", color=:blue, bold=true)
+
+@testset verbose=true "TigerFetch.jl" begin
+ for test in testsuite
+ println("\033[1m\033[32m → RUNNING\033[0m: $(test)")
+ include("UnitTests/$(test).jl")
+ println("\033[1m\033[32m PASSED\033[0m")
+ end
end
+# --------------------------------------------------------------------------------------------------