2 Import the Data

2.1 Dependencies

# Load needed modules
box::use(
    dplyr[full_join, glimpse, select],
    janitor[clean_names],
    magrittr[`%>%`],
    readxl[read_xlsx],
    sf[st_set_geometry],
    sids_data_wrangling = ./modules/sids_data_wrangling,
    tibble[as_tibble]
)

2.2 Initial import

First, read in the excel file that was originally shared for this project:

raw <- 
    # Parse excel file
    read_xlsx("data/finaldataforanalysis3_220121.xlsx") %>%
    # Clean up variability in naming conventions
    clean_names()

2.3 Join census tract populations

Then, import census population data:

## This is a custom function I wrote that 
## pulls data from the TidyCensus API about
## the population count of people under five 
## years old and about spatial features 
## for each census tract. I have commented it 
## out and saved the result in an RDS file
## so as to not make a new call to the API 
## every time this script is run. You can
## inspect the function definition in the 
## modules folder of the source code.

# coords_and_pop_est <- 
#     sids_data_wrangling$get_coords_and_pop_est(raw)
# 
# saveRDS(coords_and_pop_est, "data/coords_and_pop_est.RDS")

coords_and_pop_est <- readRDS("data/coords_and_pop_est.RDS")

# Join the population counts to the imported dataframe
df <- 
    coords_and_pop_est %>%
    # Drop geospatial features
    st_set_geometry(NULL) %>%
    # Convert to tibble format
    as_tibble() %>%
    # And join to raw
    full_join(raw)
#> Joining, by = "fips"

# Preview the data
glimpse(df)
#> Rows: 1,315
#> Columns: 32
#> $ fips                                 <dbl> 17031807500, …
#> $ pop_under_five                       <dbl> 151, 192, 21,…
#> $ count_asphyxia                       <dbl> 0, 0, 1, 0, 0…
#> $ count_opioid_death                   <dbl> 1, 7, 2, 2, 6…
#> $ svi_socioeconomic                    <dbl> 0.1269, 0.593…
#> $ svi_household_composition_disability <dbl> 0.1728, 0.803…
#> $ svi_minority_language                <dbl> 0.7024, 0.677…
#> $ svi_housing_transportation           <dbl> 0.3690, 0.528…
#> $ svi_summary_ranking                  <dbl> 0.2470, 0.679…
#> $ pe_foreignborn                       <dbl> 31.6, 2.0, 1.…
#> $ pe_marriedmales                      <dbl> 62.5, 23.0, 3…
#> $ pe_marriedfemales                    <dbl> 56.6, 23.0, 2…
#> $ pedivorcewidowedmale                 <dbl> 6.4, 16.9, 7.…
#> $ pedivorcewidowedfemale               <dbl> 16.8, 34.7, 3…
#> $ pelessthanhighschool                 <dbl> 7.1, 9.2, 8.0…
#> $ highschooldiploma                    <dbl> 14.6, 28.4, 2…
#> $ somecollege                          <dbl> 12.8, 26.4, 3…
#> $ collegediploma                       <dbl> 65.5, 36.0, 3…
#> $ black                                <dbl> 2.5, 97.4, 96…
#> $ white                                <dbl> 58.3, 0.7, 1.…
#> $ hispanic                             <dbl> 5.6, 0.0, 2.2…
#> $ male                                 <dbl> 48.8, 50.8, 3…
#> $ percent_enployed                     <dbl> 61.6, 49.0, 4…
#> $ incomelt10                           <dbl> 0.0, 15.7, 10…
#> $ incomelt25                           <dbl> 3.6, 15.6, 22…
#> $ incomelt50                           <dbl> 10.9, 15.9, 2…
#> $ incomelt75                           <dbl> 15.7, 27.6, 1…
#> $ incomegt75                           <dbl> 69.8, 25.3, 2…
#> $ privateinsurance                     <dbl> 78.9, 55.5, 5…
#> $ publicinsurance                      <dbl> 26.4, 43.5, 5…
#> $ noinsurance                          <dbl> 2.8, 12.2, 13…
#> $ spanish_language                     <dbl> 6.0, 2.1, 0.7…

2.4 Save for use in other chapters

saveRDS(df, file = "data/df.RDS")