TigerFetch.jl

Download TIGER/Line shapefiles from the US Census Bureau
Log | Files | Refs | README | LICENSE

commit 47116de2b63fc72e696d3462e732991c4adfdd30
parent c5a5c8172b0f1905f7754468bbe2e229e08da6e9
Author: Erik Loualiche <[email protected]>
Date:   Sun, 23 Feb 2025 20:39:56 -0600

Filling slowly all available shapes.

Diffstat:
M.github/workflows/CI.yml | 29++++++++++++++++-------------
MREADME.md | 8++++++--
Msrc/download.jl | 57++++++++++++++++++++++++++++++++++++++++-----------------
Msrc/geotypes.jl | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/main.jl | 9+++++++++
Atest/UnitTests/downloads.jl | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/runtests.jl | 5+++--
7 files changed, 269 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml @@ -3,9 +3,8 @@ on: push: branches: - main - tags: ['*'] + tags: ["*"] pull_request: - workflow_dispatch: concurrency: # Skip intermediate builds: always. # Cancel intermediate builds: only if it is a pull request build. @@ -13,29 +12,33 @@ concurrency: cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} jobs: test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} - timeout-minutes: 60 - permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created - actions: write - contents: read + env: + WRDS_USERNAME: ${{ secrets.WRDS_USERNAME }} + WRDS_PWD: ${{ secrets.WRDS_PWD }} strategy: fail-fast: false matrix: version: - - '1.11' - - '1.6' - - 'pre' + - "1" + - "1.11" os: - ubuntu-latest arch: - x64 steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v2 + - uses: julia-actions/cache@v1 - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} # required + fail_ci_if_error: false + file: lcov.info diff --git a/README.md b/README.md @@ -6,8 +6,8 @@ Install the command line tool (you need a julia installation for this) ```bash mkdir -p /.local/share/julia # or some other directory -git clone [email protected]:eloualiche/TigerFetch.jl.git ~/.local/share/julia -julia --project deps/build.jl install +git clone [email protected]:eloualiche/TigerFetch.jl.git ~/.local/share/julia +cd ~/.local/share/julia && julia --project deps/build.jl install ``` The binary will available at `~/.julia/bin/tigerfetch` but also depends on the downloaded packages. @@ -42,3 +42,7 @@ You can use it ~/.julia/bin/tigerfetch areawater --state "Minnesota" --county "Hennepin" --output tmp # works ``` + +#### Julia package + +Look at the test suite (specifically `UnitTests/downloads.jl`) for now diff --git a/src/download.jl b/src/download.jl @@ -49,33 +49,56 @@ function download_shapefile( else @warn "No state specified - downloading all states" states_to_process = get_state_list() + + # There are some exceptions because not everything is available all the time! + (geo isa CountySubdivision) ? filter!(s -> s[2] != "74", states_to_process) : nothing + end # Use the type of geo to get tiger_name geo_type = typeof(geo) base_url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/" - # Process each state - for state_info in states_to_process - fips = state_info[2] - state_name = state_info[3] - filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(geo_type))).zip" - url = base_url * filename - output_path = joinpath(output_dir, filename) + try + # Process each state with total interrupt by user ... + for state_info in states_to_process + fips = state_info[2] + state_name = state_info[3] + filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(T))).zip" + url = base_url * filename + output_path = joinpath(output_dir, filename) - if isfile(output_path) && !force - @info "File exists" state=state_name path=output_path - continue - end + if isfile(output_path) && !force + @info "File exists" state=state_name path=output_path + continue + end - try - @info "Downloading" state=state_name url=url - Downloads.download(url, output_path) - catch e - @error "Download failed" state=state_name exception=e - continue + try + @info "Downloading" state=state_name url=url + Downloads.download(url, output_path) + catch e + if e isa InterruptException + # Re-throw interrupt to be caught by outer try block + rethrow(e) + end + @error "Download failed" state=state_name exception=e + continue + end end + catch e + if e isa InterruptException + @info "Download process interrupted by user" + # Optional: Clean up partially downloaded file + try + isfile(output_path) && rm(output_path) + catch + # Ignore cleanup errors + end + rethrow(e) # This will exit the function + end + rethrow(e) # Re-throw any other unexpected errors end + end # -------------------------------------------------------------------------------------------------- diff --git a/src/geotypes.jl b/src/geotypes.jl @@ -1,3 +1,5 @@ + +# -------------------------------------------------------------------------------------------------- # Abstract base type abstract type TigerGeography end @@ -5,7 +7,10 @@ abstract type TigerGeography end abstract type NationalGeography <: TigerGeography end abstract type StateGeography <: TigerGeography end abstract type CountyGeography <: TigerGeography end +# -------------------------------------------------------------------------------------------------- + +# -------------------------------------------------------------------------------------------------- # Concrete types with their metadata as constants struct State <: NationalGeography year::Int @@ -17,39 +22,112 @@ struct County <: NationalGeography end const COUNTY_META = (tiger_name = "COUNTY", description = "County Boundaries") +struct ZipCode <: NationalGeography + year::Int +end +const ZIP_META = (tiger_name = "ZCTA520", description = "2020 5-Digit ZIP Code Tabulation Area") + +struct UrbanArea <: NationalGeography + year::Int +end +const URBANAREA_META = (tiger_name = "UAC20", description = "2020 Urban Area/Urban Cluster") + +struct PrimaryRoads <: NationalGeography + year::Int +end +const PRIMARYROADS_META = (tiger_name = "PRIMARYROADS", description = "Primary Roads") + +struct CBSA <: NationalGeography + year::Int +end +const CBSA_META = (tiger_name = "CBSA", description = "Core Based Statistical Area") + +struct METDIV <: NationalGeography + year::Int +end +const METDIV_META = (tiger_name = "METDIV", description = "Metropolitan Division") +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- struct CountySubdivision <: StateGeography year::Int end -const COUSUB_META = (tiger_name = "COUSUB", description = "County Subdivisions") +const COUSUB_META = (tiger_name = "COUSUB", description = "County Subdivision") + +struct Tract <: StateGeography + year::Int +end +const TRACT_META = (tiger_name = "TRACT", description = "Census Tract") -struct Tract <: CountyGeography +struct PrimarySecondaryRoads <: StateGeography year::Int end -const TRACT_META = (tiger_name = "TRACT", description = "Census Tracts") +const PSROADS_META = (tiger_name = "PRISECROADS", description = "Primary and Secondary Roads") +# -------------------------------------------------------------------------------------------------- + +# -------------------------------------------------------------------------------------------------- +# --- county geographies struct AreaWater <: CountyGeography year::Int end -const AREAWATER_META = (tiger_name = "AREAWATER", description = "Area Water") +const AREAWATER_META = (tiger_name = "AREAWATER", description = "Area Hydrography") + +struct LinearWater <: CountyGeography + year::Int +end +const LINEARWATER_META = (tiger_name = "LINEARWATER", description = "Linear Hydrography") +struct Roads <: CountyGeography + year::Int +end +const ROADS_META = (tiger_name = "ROADS", description = "Roads") +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- # Helper methods to access metadata tiger_name(::Type{State}) = STATE_META.tiger_name tiger_name(::Type{County}) = COUNTY_META.tiger_name +tiger_name(::Type{ZipCode}) = ZIP_META.tiger_name +tiger_name(::Type{UrbanArea}) = URBANAREA_META.tiger_name +tiger_name(::Type{PrimaryRoads}) = PRIMARYROADS_META.tiger_name +tiger_name(::Type{CBSA}) = CBSA_META.tiger_name +tiger_name(::Type{METDIV}) = METDIV_META.tiger_name + tiger_name(::Type{CountySubdivision}) = COUSUB_META.tiger_name tiger_name(::Type{Tract}) = TRACT_META.tiger_name +tiger_name(::Type{PrimarySecondaryRoads}) = PSROADS_META.tiger_name + tiger_name(::Type{AreaWater}) = AREAWATER_META.tiger_name +tiger_name(::Type{LinearWater}) = LINEARWATER_META.tiger_name +tiger_name(::Type{Roads}) = ROADS_META.tiger_name tiger_name(x::T) where T <: TigerGeography = tiger_name(T) +# -- description description(::Type{State}) = STATE_META.description description(::Type{County}) = COUNTY_META.description +description(::Type{ZipCode}) = ZIP_META.description +description(::Type{UrbanArea}) = URBANAREA_META.description +description(::Type{PrimaryRoads}) = PRIMARYROADS_META.description +description(::Type{CBSA}) = CBSA_META.description +description(::Type{METDIV}) = METDIV_META.description + description(::Type{CountySubdivision}) = COUSUB_META.description description(::Type{Tract}) = TRACT_META.description +description(::Type{PrimarySecondaryRoads}) = PSROADS_META.description + description(::Type{AreaWater}) = AREAWATER_META.description +description(::Type{LinearWater}) = LINEARWATER_META.description +description(::Type{Roads}) = ROADS_META.description description(x::T) where T <: TigerGeography = description(T) +# -- # Helper methods now just reference the type hierarchy scope(::Type{T}) where {T <: NationalGeography} = National scope(::Type{T}) where {T <: StateGeography} = ByState scope(::Type{T}) where {T <: CountyGeography} = ByCounty +# --------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/src/main.jl b/src/main.jl @@ -3,9 +3,18 @@ const GEOGRAPHY_TYPES = Dict( "state" => State, "county" => County, + "zipcode" => ZipCode, + "urbanarea" => UrbanArea, + "primaryroads" => PrimaryRoads, + "cousub" => CountySubdivision, "tract" => Tract, + "primarysecondaryroads" => PrimarySecondaryRoads, + "areawater" => AreaWater, + "linearwater" => LinearWater, + "road" => Roads, + ) # julia function diff --git a/test/UnitTests/downloads.jl b/test/UnitTests/downloads.jl @@ -0,0 +1,111 @@ +@testset "Download Tests" begin + + +# -------------------------------------------------------------------------------------------------- + @testset "National Level Downloads" begin + + test_dir = mktempdir() + + # Download the states shapefiles + tigerdownload("state", 2024; state="MN", county="", output=test_dir, force=true) + state_file_download = joinpath(test_dir, "tl_2024_us_state.zip") + # stat(state_file_download) + @test bytes2hex(SHA.sha256(read(state_file_download))) == + "e30bad8922b177b5991bf8606d3d95de8f5f0b4bab25848648de53b25f72c17f" + + tigerdownload("county", 2024; state="MN", county="Hennepin", output=test_dir, force=true) + county_file_download = joinpath(test_dir, "tl_2024_us_county.zip") + # stat(county_file_download) + @test bytes2hex(SHA.sha256(read(county_file_download))) == + "a344b72be48f2448df1ae1757098d94571b96556d3b9253cf9d6ee77bce8a0b4" + + # -- still to test zcta520, urban area, cbsa + + + end +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- + @testset "State Level Downloads" begin + + test_dir = mktempdir() + + # Download the county subdivisions shapefiles + tigerdownload("cousub", 2024; state="MN", county="", output=test_dir, force=true) + cousub_file_download = joinpath(test_dir, "tl_2024_27_cousub.zip") + # stat(cousub_file_download) + @test bytes2hex(SHA.sha256(read(cousub_file_download))) == + "b1cf4855fe102d9ebc34e165457986b8d906052868da0079ea650d39d973ec98" + + # for all the states ... + tigerdownload("cousub", 2024; output=test_dir, force=false) + cousub_file_list = [ "tl_2024_$(x[2])_cousub.zip" + for x in TigerFetch.get_state_list() ] + cousub_file_list = joinpath.(test_dir, cousub_file_list) + @test !all(isfile.(cousub_file_list)) # there should be one missing file + @test all(.!isfile.(filter(contains("tl_2024_74_cousub.zip"), cousub_file_list))) # there should be one missing file + + cousub_file_download = filter(contains("tl_2024_28_cousub.zip"), cousub_file_list)[1] + round(stat(cousub_file_download).size / 1024, digits=2) + @test bytes2hex(SHA.sha256(read(cousub_file_download))) == + "f91963513bf14f64267fefc5ffda24161e879bfb76a48c19517eba0f85c638ba" + + # -- tracts + tigerdownload("tract", 2024; state="27", county="", output=test_dir, force=true) + tract_file_download = joinpath(test_dir, "tl_2024_27_tract.zip") + round(stat(tract_file_download).size / 1024, digits=2) + @test bytes2hex(SHA.sha256(read(tract_file_download))) == + "83f784b2042d0af55723baaac37b2b29840d1485ac233b3bb73d6af4ec7246eb" + + # -- roads + tigerdownload("primarysecondaryroads", 2024; state="27", county="", output=test_dir, force=true) + road_file_download = joinpath(test_dir, "tl_2024_27_prisecroads.zip") + round(stat(road_file_download).size / 1024, digits=2) + @test bytes2hex(SHA.sha256(read(road_file_download))) == + "3c06a9b03ca06abf42db85b3b9ab3110d251d54ccf3d59335a2e5b98d2e6f52a" + + + + end +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- + @testset "County Level Downloads" begin + + test_dir = mktempdir() + + # Download the areawater shapefiles + tigerdownload("areawater", 2024; state="MN", county="Hennepin", output=test_dir, force=true) + areawater_file_download = joinpath(test_dir, "tl_2024_27053_areawater.zip") + # stat(cousub_file_download) + @test bytes2hex(SHA.sha256(read(areawater_file_download))) == + "54a2825f26405fbb83bd4c5c7a96190867437bc46dc0d4a8155198890d63db54" + + # Download the linear water shapefiles for all of Michigan + tigerdownload("linearwater", 2024; state="MI", output=test_dir, force=true) + linearwater_file_list = [ "tl_2024_$(x[2])$(x[3])_linearwater.zip" + for x in TigerFetch.get_county_list("MI") ] + linearwater_file_list = joinpath.(test_dir, linearwater_file_list) + @test all(isfile.(linearwater_file_list)) # test that all the files are there + + linearwater_file_download = filter(contains("tl_2024_26089_linearwater.zip"), linearwater_file_list)[1] + round(stat(linearwater_file_download).size / 1024, digits=2) + @test bytes2hex(SHA.sha256(read(linearwater_file_download))) == + "b05a58ddb37abdc9287c533a6f87110ef4b153dc4fbd20833d3d1cf56470cba7" + + # roads + tigerdownload("road", 2024; state="MN", county="Hennepin", output=test_dir, force=true) + roads_file_download = joinpath(test_dir, "tl_2024_27053_roads.zip") + round(stat(roads_file_download).size / 1024, digits=2) + @test bytes2hex(SHA.sha256(read(roads_file_download))) == + "b828ad38a8bc3cd3299efcc7e3b333ec2954229392eb254a460e596c1db78511" + + + + end +# -------------------------------------------------------------------------------------------------- + + +end+ \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl @@ -1,14 +1,15 @@ # -------------------------------------------------------------------------------------------------- using TigerFetch using Test -using Pkg.Artifacts +using Pkg.Artifacts using SHA -# using LazyArtifacts + const testsuite = [ "assets", + "downloads", ] # --------------------------------------------------------------------------------------------------