12 Additional Feature Engineering
12.1 Demographic Features
library('tidyverse')
#> -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
#> v ggplot2 3.3.3 v purrr 0.3.4
#> v tibble 3.1.2 v dplyr 1.0.6
#> v tidyr 1.1.3 v stringr 1.4.0
#> v readr 1.4.0 v forcats 0.5.1
#> -- Conflicts ------------------------------------------ tidyverse_conflicts() --
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
source('C:/Users/jkyle/Documents/GitHub/Jeff_Data_Wrangling/Functions/connect_help.R')
#> Warning: `src_sqlite()` was deprecated in dplyr 1.0.0.
#> Please use `tbl()` directly with a database connection
NHANES_Tables#> [1] "ACQ" "ALB_CR" "ALQ" "ALQY" "AUQ" "BIOPRO"
#> [7] "BMX" "BPQ" "BPX" "CBC" "CDQ" "CMV"
#> [13] "COT" "CRCO" "DBQ" "DEMO" "DEQ" "DIQ"
#> [19] "DLQ" "DPQ" "DR1IFF" "DR1TOT" "DR2IFF" "DR2TOT"
#> [25] "DRXFCD" "DS1IDS" "DS1TOT" "DS2TOT" "DSBI" "DSII"
#> [31] "DSQIDS" "DSQTOT" "DUQ" "DXX" "DXXFEM" "DXXSPN"
#> [37] "Ds2IDS" "ECQ" "FASTQX" "FERTIN" "FETIB" "FOLATE"
#> [43] "FOLFMS" "GHB" "GLU" "HDL" "HEPA" "HEPBD"
#> [49] "HEPC" "HEPE" "HEQ" "HIQ" "HIV" "HOQ"
#> [55] "HSCRP" "HSQ" "HSV" "HUQ" "IHGEM" "IMQ"
#> [61] "INS" "KIQ" "LUX" "MCQ" "OCQ" "OHQ"
#> [67] "OHXDEN" "OHXREF" "OSQ" "PAQ" "PAQY" "PBCD"
#> [73] "PFAS" "PFQ" "PUQMEC" "RHQ" "RXQASA" "RXQ_DRUG"
#> [79] "RXQ_RX" "SLQ" "SMQ" "SMQFAM" "SMQRTU" "SMQSHS"
#> [85] "SSPFAS" "SXQ" "TCHOL" "TFR" "TRIGLY" "UCFLOW"
#> [91] "UCM" "UCPREG" "UHG" "UIO" "UNI" "VIC"
#> [97] "VOCWB" "VTQ"
\(~\)
\(~\)
# Open_NHANES_table_help('DEMO')
12.1.1 Age & Gender
<- DEMO %>%
AGE_GENDER_TBL mutate(Gender = if_else(RIAGENDR == 2 , "Female", "Male")) %>%
mutate(Age = if_else(is.na(RIDAGEYR), RIDAGEMN/12 , RIDAGEYR)) %>%
select(SEQN, Age, Gender)
%>%
AGE_GENDER_TBL glimpse()
#> Rows: ??
#> Columns: 3
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ~
#> $ Age <dbl> 2, 77, 10, 1, 49, 19, 59, 13, 11, 43, 15, 37, 70, 81, 38, 85, 2~
#> $ Gender <chr> "Female", "Male", "Female", "Male", "Male", "Female", "Female",~
12.1.2 Race/Hispanic origin
We see that only RIDRETH1
is available in both 2017-2018
and 1999-2000
:
<- DEMO %>%
RACE_TBL select(SEQN, RIDRETH1) %>%
mutate(Race =
case_when(RIDRETH1 == 1 ~ "Mexican American",
== 2 ~ "Other Hispanic",
RIDRETH1 == 3 ~ "White",
RIDRETH1 == 4 ~ "Black",
RIDRETH1 == 5 ~ "Other",
RIDRETH1 !(RIDRETH1 %in% c(1,2,3,4,5)) ~ NA )) %>%
select(-RIDRETH1)
%>%
RACE_TBL head()
#> # Source: lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> SEQN Race
#> <dbl> <chr>
#> 1 1 Black
#> 2 2 White
#> 3 3 White
#> 4 4 Black
#> 5 5 White
#> 6 6 Other
12.1.3 US Armed Forces
We see that only DMQMILIZ
is available since 2011-2012
, prior to 2011, the veteran status information (released in the variable DMQMILIT
in the demographics file in 1999-2010
)
<- DEMO %>%
USFA_TBL select(SEQN, DMQMILIZ, DMQMILIT) %>%
mutate(USAF =
case_when((DMQMILIZ == 1 | DMQMILIT == 1) ~ "Yes",
== 2 | DMQMILIT == 2) ~ "No",
(DMQMILIZ !(DMQMILIZ %in% c(1,2) | DMQMILIT %in% c(1,2) ) |(is.na(DMQMILIZ) & is.na(DMQMILIT)) ~ NA )) %>%
select(-DMQMILIZ, -DMQMILIT)
%>%
USFA_TBL head()
#> # Source: lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> SEQN USAF
#> <dbl> <chr>
#> 1 1 <NA>
#> 2 2 Yes
#> 3 3 <NA>
#> 4 4 <NA>
#> 5 5 Yes
#> 6 6 No
12.1.4 Country of Birth
Due to the concerns of disclosure risk, starting in 2011, country of birth was recoded into two categories: 1) Born in 50 U.S. states or Washington, DC; and 2) Born in other countries, including U.S. territories.
<- DEMO %>%
BIRTH_COUNTRY_TBL select(SEQN, DMDBORN, DMDBORN2, DMDBORN4 ) %>%
mutate(Birth_Country =
case_when(DMDBORN == 1 | DMDBORN2 == 1 | DMDBORN4 == 1 ~ "USA",
== 2 | DMDBORN2 == 2 | DMDBORN4 == 2 ~ "Other",
DMDBORN > 2 | DMDBORN2 > 2 | DMDBORN4 > 2 ) |
(DMDBORN is.na(DMDBORN) & is.na(DMDBORN2) & is.na(DMDBORN4)) ~ NA )) %>%
(select(SEQN, Birth_Country)
%>%
BIRTH_COUNTRY_TBL head()
#> # Source: lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> SEQN Birth_Country
#> <dbl> <chr>
#> 1 1 USA
#> 2 2 USA
#> 3 3 <NA>
#> 4 4 USA
#> 5 5 USA
#> 6 6 USA
12.1.5 Education
<- DEMO %>%
EDUCATION_TBL select(SEQN, DMDEDUC3, DMDEDUC2) %>%
mutate(Grade_Level =
case_when(DMDEDUC3 ==0~"Never attended / kindergarten only",
==1~"1st grade",
DMDEDUC3 ==2~"2nd grade",
DMDEDUC3 ==3~"3rd grade",
DMDEDUC3 ==4~"4th grade",
DMDEDUC3 ==5~"5th grade",
DMDEDUC3 ==6~"6th grade",
DMDEDUC3 ==7~"7th grade",
DMDEDUC3 ==8~"8th grade",
DMDEDUC3 ==9~"9th grade",
DMDEDUC3 ==10~"10th grade",
DMDEDUC3 ==11~"11th grade",
DMDEDUC3 ==12~"12th grade, no diploma",
DMDEDUC3 ==13~"High school graduate",
DMDEDUC3 ==14~"GED or equivalent",
DMDEDUC3 ==15~"More than high school",
DMDEDUC3 ==55~"Less than 5th grade",
DMDEDUC3 ==66~"Less than 9th grade",
DMDEDUC3 ==77~"Refused",
DMDEDUC3 ==99~"Don't Know",
DMDEDUC3 is.na(DMDEDUC3) | DMDEDUC3 > 66 ~ NA)) %>%
select(-DMDEDUC3) %>%
mutate(Grade_Range =
case_when(DMDEDUC2 ==1~"Less than 9th grade",
==2~"9-11th grade (Includes 12th grade with no diploma)",
DMDEDUC2 ==3~"High school graduate/GED or equivalent",
DMDEDUC2 ==4~"Some college or AA degree",
DMDEDUC2 ==5~"College graduate or above",
DMDEDUC2 > 5 | is.na(DMDEDUC2) ~ NA )) %>%
DMDEDUC2 select(-DMDEDUC2)
%>%
EDUCATION_TBL head()
#> # Source: lazy query [?? x 3]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> SEQN Grade_Level Grade_Range
#> <dbl> <chr> <chr>
#> 1 1 <NA> <NA>
#> 2 2 <NA> College graduate or above
#> 3 3 3rd grade <NA>
#> 4 4 <NA> <NA>
#> 5 5 <NA> College graduate or above
#> 6 6 More than high school <NA>
12.1.6 Marital Status
<- DEMO %>%
MARITAL_STATUS_TBL select(SEQN, DMDMARTL) %>%
mutate(Marital_Status =
case_when(DMDMARTL ==1~"Married",
==2~"Widowed",
DMDMARTL ==3~"Divorced",
DMDMARTL ==4~"Separated",
DMDMARTL ==5~"Never married",
DMDMARTL ==6~"Living with partner",
DMDMARTL >6 | is.na(DMDMARTL) ~ NA )) %>%
DMDMARTL select(-DMDMARTL)
%>%
MARITAL_STATUS_TBL head()
#> # Source: lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> SEQN Marital_Status
#> <dbl> <chr>
#> 1 1 <NA>
#> 2 2 <NA>
#> 3 3 <NA>
#> 4 4 <NA>
#> 5 5 Married
#> 6 6 Never married
12.1.7 Pregnancy Status
<- DEMO %>%
PREG_STATUS_TBL select(SEQN, RIDEXPRG) %>%
mutate(Pregnant =
case_when(RIDEXPRG == 1 ~ "Pregnant",
== 2 ~ "Not Pregnant",
RIDEXPRG > 2 | is.na(RIDEXPRG) ~ NA )) %>%
RIDEXPRG select(-RIDEXPRG)
%>%
PREG_STATUS_TBL head()
#> # Source: lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> SEQN Pregnant
#> <dbl> <chr>
#> 1 1 <NA>
#> 2 2 <NA>
#> 3 3 <NA>
#> 4 4 <NA>
#> 5 5 <NA>
#> 6 6 Not Pregnant
12.1.8 Income
<- DEMO %>%
INCOME_TBL select(SEQN, INDHHIN2, INDFMIN2, INDFMPIR) %>%
mutate(Household_Icome = case_when(INDHHIN2 ==1~"$ 0 to $ 4,999",
==2~"$ 5,000 to $ 9,999",
INDHHIN2 ==3~"$10,000 to $14,999",
INDHHIN2 ==4~"$15,000 to $19,999",
INDHHIN2 ==5~"$20,000 to $24,999",
INDHHIN2 ==6~"$25,000 to $34,999",
INDHHIN2 ==7~"$35,000 to $44,999",
INDHHIN2 ==8~"$45,000 to $54,999",
INDHHIN2 ==9~"$55,000 to $64,999",
INDHHIN2 ==10~"$65,000 to $74,999",
INDHHIN2 ==12~"$20,000 and Over",
INDHHIN2 ==13~"Under $20,000",
INDHHIN2 ==14~"$75,000 to $99,999",
INDHHIN2 ==15~"$100,000 and Over",
INDHHIN2 > 15 | is.na(INDHHIN2) ~ NA)) %>%
INDHHIN2 select(-INDHHIN2) %>%
mutate(Family_Income = case_when(INDFMIN2 ==1~"$ 0 to $ 4,999",
==2~"$ 5,000 to $ 9,999",
INDFMIN2 ==3~"$10,000 to $14,999",
INDFMIN2 ==4~"$15,000 to $19,999",
INDFMIN2 ==5~"$20,000 to $24,999",
INDFMIN2 ==6~"$25,000 to $34,999",
INDFMIN2 ==7~"$35,000 to $44,999",
INDFMIN2 ==8~"$45,000 to $54,999",
INDFMIN2 ==9~"$55,000 to $64,999",
INDFMIN2 ==10~"$65,000 to $74,999",
INDFMIN2 ==12~"$20,000 and Over",
INDFMIN2 ==13~"Under $20,000",
INDFMIN2 ==14~"$75,000 to $99,999",
INDFMIN2 ==15~"$100,000 and Over",
INDFMIN2 > 15 | is.na(INDFMIN2) ~ NA )) %>%
INDFMIN2 select(-INDFMIN2) %>%
mutate(Poverty_Income_Ratio =
case_when(INDFMPIR <= 5 ~ INDFMPIR,
is.na(INDFMPIR) ~ NA)) %>%
select(-INDFMPIR)
%>%
INCOME_TBL head()
#> # Source: lazy query [?? x 4]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> SEQN Household_Icome Family_Income Poverty_Income_Ratio
#> <dbl> <lgl> <lgl> <dbl>
#> 1 1 NA NA 0.86
#> 2 2 NA NA 5
#> 3 3 NA NA 1.47
#> 4 4 NA NA 0.57
#> 5 5 NA NA 5
#> 6 6 NA NA 1.21
12.1.9 Demographic Features Final
<- DEMO %>%
DEMO_FEATURES select(SEQN) %>%
left_join(AGE_GENDER_TBL) %>%
left_join(RACE_TBL) %>%
left_join(USFA_TBL) %>%
left_join(BIRTH_COUNTRY_TBL) %>%
left_join(EDUCATION_TBL) %>%
left_join(MARITAL_STATUS_TBL) %>%
left_join(PREG_STATUS_TBL) %>%
left_join(INCOME_TBL)
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
%>%
DEMO_FEATURES glimpse()
#> Rows: ??
#> Columns: 13
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
#> $ Age <dbl> 2, 77, 10, 1, 49, 19, 59, 13, 11, 43, 15, 37, 70,~
#> $ Gender <chr> "Female", "Male", "Female", "Male", "Male", "Fema~
#> $ Race <chr> "Black", "White", "White", "Black", "White", "Oth~
#> $ USAF <chr> NA, "Yes", NA, NA, "Yes", "No", "No", NA, NA, "Ye~
#> $ Birth_Country <chr> "USA", "USA", NA, "USA", "USA", "USA", "USA", "US~
#> $ Grade_Level <chr> NA, NA, "3rd grade", NA, NA, "More than high scho~
#> $ Grade_Range <chr> NA, "College graduate or above", NA, NA, "College~
#> $ Marital_Status <chr> NA, NA, NA, NA, "Married", "Never married", "Marr~
#> $ Pregnant <chr> NA, NA, NA, NA, NA, "Not Pregnant", "Not Pregnant~
#> $ Household_Icome <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ Family_Income <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ Poverty_Income_Ratio <dbl> 0.86, 5.00, 1.47, 0.57, 5.00, 1.21, NA, 0.53, NA,~
%>%
DEMO_FEATURES show_query()
#> <SQL>
#> SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`, `Marital_Status`, `Pregnant`, `Household_Icome`, `Family_Income`, `Poverty_Income_Ratio`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`, `Marital_Status`, `Pregnant`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`, `Marital_Status`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`
#> FROM (SELECT `SEQN`
#> FROM `DEMO`) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE WHEN (((`RIDAGEYR`) IS NULL)) THEN (`RIDAGEMN` / 12.0) WHEN NOT(((`RIDAGEYR`) IS NULL)) THEN (`RIDAGEYR`) END AS `Age`, `Gender`
#> FROM (SELECT `SEQN`, `SDDSRVYR`, `RIDSTATR`, `RIDEXMON`, `RIAGENDR`, `RIDAGEYR`, `RIDAGEMN`, `RIDAGEEX`, `RIDRETH1`, `RIDRETH2`, `DMQMILIT`, `DMDBORN`, `DMDCITZN`, `DMDYRSUS`, `DMDEDUC3`, `DMDEDUC2`, `DMDEDUC`, `DMDSCHOL`, `DMDMARTL`, `DMDHHSIZ`, `INDHHINC`, `INDFMINC`, `INDFMPIR`, `RIDEXPRG`, `RIDPREG`, `DMDHRGND`, `DMDHRAGE`, `DMDHRBRN`, `DMDHREDU`, `DMDHRMAR`, `DMDHSEDU`, `WTINT2YR`, `WTINT4YR`, `WTMEC2YR`, `WTMEC4YR`, `SDMVPSU`, `SDMVSTRA`, `SDJ1REPN`, `DMAETHN`, `DMARACE`, `WTMREP01`, `WTMREP02`, `WTMREP03`, `WTMREP04`, `WTMREP05`, `WTMREP06`, `WTMREP07`, `WTMREP08`, `WTMREP09`, `WTMREP10`, `WTMREP11`, `WTMREP12`, `WTMREP13`, `WTMREP14`, `WTMREP15`, `WTMREP16`, `WTMREP17`, `WTMREP18`, `WTMREP19`, `WTMREP20`, `WTMREP21`, `WTMREP22`, `WTMREP23`, `WTMREP24`, `WTMREP25`, `WTMREP26`, `WTMREP27`, `WTMREP28`, `WTMREP29`, `WTMREP30`, `WTMREP31`, `WTMREP32`, `WTMREP33`, `WTMREP34`, `WTMREP35`, `WTMREP36`, `WTMREP37`, `WTMREP38`, `WTMREP39`, `WTMREP40`, `WTMREP41`, `WTMREP42`, `WTMREP43`, `WTMREP44`, `WTMREP45`, `WTMREP46`, `WTMREP47`, `WTMREP48`, `WTMREP49`, `WTMREP50`, `WTMREP51`, `WTMREP52`, `WTIREP01`, `WTIREP02`, `WTIREP03`, `WTIREP04`, `WTIREP05`, `WTIREP06`, `WTIREP07`, `WTIREP08`, `WTIREP09`, `WTIREP10`, `WTIREP11`, `WTIREP12`, `WTIREP13`, `WTIREP14`, `WTIREP15`, `WTIREP16`, `WTIREP17`, `WTIREP18`, `WTIREP19`, `WTIREP20`, `WTIREP21`, `WTIREP22`, `WTIREP23`, `WTIREP24`, `WTIREP25`, `WTIREP26`, `WTIREP27`, `WTIREP28`, `WTIREP29`, `WTIREP30`, `WTIREP31`, `WTIREP32`, `WTIREP33`, `WTIREP34`, `WTIREP35`, `WTIREP36`, `WTIREP37`, `WTIREP38`, `WTIREP39`, `WTIREP40`, `WTIREP41`, `WTIREP42`, `WTIREP43`, `WTIREP44`, `WTIREP45`, `WTIREP46`, `WTIREP47`, `WTIREP48`, `WTIREP49`, `WTIREP50`, `WTIREP51`, `WTIREP52`, `yr_range`, `SIALANG`, `SIAPROXY`, `SIAINTRP`, `FIALANG`, `FIAPROXY`, `FIAINTRP`, `MIALANG`, `MIAPROXY`, `MIAINTRP`, `AIALANG`, `DMDFMSIZ`, `DMDBORN2`, `INDHHIN2`, `INDFMIN2`, `DMDHRBR2`, `RIDRETH3`, `RIDEXAGY`, `RIDEXAGM`, `DMQMILIZ`, `DMQADFC`, `DMDBORN4`, `AIALANGA`, `DMDHHSZA`, `DMDHHSZB`, `DMDHHSZE`, `DMDHRBR4`, `DMDHRAGZ`, `DMDHREDZ`, `DMDHRMAZ`, `DMDHSEDZ`, CASE WHEN (`RIAGENDR` = 2.0) THEN ('Female') WHEN NOT(`RIAGENDR` = 2.0) THEN ('Male') END AS `Gender`
#> FROM `DEMO`)) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`RIDRETH1` = 1.0) THEN ('Mexican American')
#> WHEN (`RIDRETH1` = 2.0) THEN ('Other Hispanic')
#> WHEN (`RIDRETH1` = 3.0) THEN ('White')
#> WHEN (`RIDRETH1` = 4.0) THEN ('Black')
#> WHEN (`RIDRETH1` = 5.0) THEN ('Other')
#> WHEN (NOT((`RIDRETH1` IN (1.0, 2.0, 3.0, 4.0, 5.0)))) THEN (NULL)
#> END AS `Race`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN ((`DMQMILIZ` = 1.0 OR `DMQMILIT` = 1.0)) THEN ('Yes')
#> WHEN ((`DMQMILIZ` = 2.0 OR `DMQMILIT` = 2.0)) THEN ('No')
#> WHEN (NOT((`DMQMILIZ` IN (1.0, 2.0) OR `DMQMILIT` IN (1.0, 2.0))) OR (((`DMQMILIZ`) IS NULL) AND ((`DMQMILIT`) IS NULL))) THEN (NULL)
#> END AS `USAF`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`DMDBORN` = 1.0 OR `DMDBORN2` = 1.0 OR `DMDBORN4` = 1.0) THEN ('USA')
#> WHEN (`DMDBORN` = 2.0 OR `DMDBORN2` = 2.0 OR `DMDBORN4` = 2.0) THEN ('Other')
#> WHEN ((`DMDBORN` > 2.0 OR `DMDBORN2` > 2.0 OR `DMDBORN4` > 2.0) OR (((`DMDBORN`) IS NULL) AND ((`DMDBORN2`) IS NULL) AND ((`DMDBORN4`) IS NULL))) THEN (NULL)
#> END AS `Birth_Country`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, `Grade_Level`, CASE
#> WHEN (`DMDEDUC2` = 1.0) THEN ('Less than 9th grade')
#> WHEN (`DMDEDUC2` = 2.0) THEN ('9-11th grade (Includes 12th grade with no diploma)')
#> WHEN (`DMDEDUC2` = 3.0) THEN ('High school graduate/GED or equivalent')
#> WHEN (`DMDEDUC2` = 4.0) THEN ('Some college or AA degree')
#> WHEN (`DMDEDUC2` = 5.0) THEN ('College graduate or above')
#> WHEN (`DMDEDUC2` > 5.0 OR ((`DMDEDUC2`) IS NULL)) THEN (NULL)
#> END AS `Grade_Range`
#> FROM (SELECT `SEQN`, `DMDEDUC2`, CASE
#> WHEN (`DMDEDUC3` = 0.0) THEN ('Never attended / kindergarten only')
#> WHEN (`DMDEDUC3` = 1.0) THEN ('1st grade')
#> WHEN (`DMDEDUC3` = 2.0) THEN ('2nd grade')
#> WHEN (`DMDEDUC3` = 3.0) THEN ('3rd grade')
#> WHEN (`DMDEDUC3` = 4.0) THEN ('4th grade')
#> WHEN (`DMDEDUC3` = 5.0) THEN ('5th grade')
#> WHEN (`DMDEDUC3` = 6.0) THEN ('6th grade')
#> WHEN (`DMDEDUC3` = 7.0) THEN ('7th grade')
#> WHEN (`DMDEDUC3` = 8.0) THEN ('8th grade')
#> WHEN (`DMDEDUC3` = 9.0) THEN ('9th grade')
#> WHEN (`DMDEDUC3` = 10.0) THEN ('10th grade')
#> WHEN (`DMDEDUC3` = 11.0) THEN ('11th grade')
#> WHEN (`DMDEDUC3` = 12.0) THEN ('12th grade, no diploma')
#> WHEN (`DMDEDUC3` = 13.0) THEN ('High school graduate')
#> WHEN (`DMDEDUC3` = 14.0) THEN ('GED or equivalent')
#> WHEN (`DMDEDUC3` = 15.0) THEN ('More than high school')
#> WHEN (`DMDEDUC3` = 55.0) THEN ('Less than 5th grade')
#> WHEN (`DMDEDUC3` = 66.0) THEN ('Less than 9th grade')
#> WHEN (`DMDEDUC3` = 77.0) THEN ('Refused')
#> WHEN (`DMDEDUC3` = 99.0) THEN ('Don''t Know')
#> WHEN (((`DMDEDUC3`) IS NULL) OR `DMDEDUC3` > 66.0) THEN (NULL)
#> END AS `Grade_Level`
#> FROM `DEMO`)) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`DMDMARTL` = 1.0) THEN ('Married')
#> WHEN (`DMDMARTL` = 2.0) THEN ('Widowed')
#> WHEN (`DMDMARTL` = 3.0) THEN ('Divorced')
#> WHEN (`DMDMARTL` = 4.0) THEN ('Separated')
#> WHEN (`DMDMARTL` = 5.0) THEN ('Never married')
#> WHEN (`DMDMARTL` = 6.0) THEN ('Living with partner')
#> WHEN (`DMDMARTL` > 6.0 OR ((`DMDMARTL`) IS NULL)) THEN (NULL)
#> END AS `Marital_Status`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`RIDEXPRG` = 1.0) THEN ('Pregnant')
#> WHEN (`RIDEXPRG` = 2.0) THEN ('Not Pregnant')
#> WHEN (`RIDEXPRG` > 2.0 OR ((`RIDEXPRG`) IS NULL)) THEN (NULL)
#> END AS `Pregnant`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, `Household_Icome`, `Family_Income`, CASE
#> WHEN (`INDFMPIR` <= 5.0) THEN (`INDFMPIR`)
#> WHEN (((`INDFMPIR`) IS NULL)) THEN (NULL)
#> END AS `Poverty_Income_Ratio`
#> FROM (SELECT `SEQN`, `INDFMPIR`, `Household_Icome`, CASE
#> WHEN (`INDFMIN2` = 1.0) THEN ('$ 0 to $ 4,999')
#> WHEN (`INDFMIN2` = 2.0) THEN ('$ 5,000 to $ 9,999')
#> WHEN (`INDFMIN2` = 3.0) THEN ('$10,000 to $14,999')
#> WHEN (`INDFMIN2` = 4.0) THEN ('$15,000 to $19,999')
#> WHEN (`INDFMIN2` = 5.0) THEN ('$20,000 to $24,999')
#> WHEN (`INDFMIN2` = 6.0) THEN ('$25,000 to $34,999')
#> WHEN (`INDFMIN2` = 7.0) THEN ('$35,000 to $44,999')
#> WHEN (`INDFMIN2` = 8.0) THEN ('$45,000 to $54,999')
#> WHEN (`INDFMIN2` = 9.0) THEN ('$55,000 to $64,999')
#> WHEN (`INDFMIN2` = 10.0) THEN ('$65,000 to $74,999')
#> WHEN (`INDFMIN2` = 12.0) THEN ('$20,000 and Over')
#> WHEN (`INDFMIN2` = 13.0) THEN ('Under $20,000')
#> WHEN (`INDFMIN2` = 14.0) THEN ('$75,000 to $99,999')
#> WHEN (`INDFMIN2` = 15.0) THEN ('$100,000 and Over')
#> WHEN (`INDFMIN2` > 15.0 OR ((`INDFMIN2`) IS NULL)) THEN (NULL)
#> END AS `Family_Income`
#> FROM (SELECT `SEQN`, `INDFMIN2`, `INDFMPIR`, CASE
#> WHEN (`INDHHIN2` = 1.0) THEN ('$ 0 to $ 4,999')
#> WHEN (`INDHHIN2` = 2.0) THEN ('$ 5,000 to $ 9,999')
#> WHEN (`INDHHIN2` = 3.0) THEN ('$10,000 to $14,999')
#> WHEN (`INDHHIN2` = 4.0) THEN ('$15,000 to $19,999')
#> WHEN (`INDHHIN2` = 5.0) THEN ('$20,000 to $24,999')
#> WHEN (`INDHHIN2` = 6.0) THEN ('$25,000 to $34,999')
#> WHEN (`INDHHIN2` = 7.0) THEN ('$35,000 to $44,999')
#> WHEN (`INDHHIN2` = 8.0) THEN ('$45,000 to $54,999')
#> WHEN (`INDHHIN2` = 9.0) THEN ('$55,000 to $64,999')
#> WHEN (`INDHHIN2` = 10.0) THEN ('$65,000 to $74,999')
#> WHEN (`INDHHIN2` = 12.0) THEN ('$20,000 and Over')
#> WHEN (`INDHHIN2` = 13.0) THEN ('Under $20,000')
#> WHEN (`INDHHIN2` = 14.0) THEN ('$75,000 to $99,999')
#> WHEN (`INDHHIN2` = 15.0) THEN ('$100,000 and Over')
#> WHEN (`INDHHIN2` > 15.0 OR ((`INDHHIN2`) IS NULL)) THEN (NULL)
#> END AS `Household_Icome`
#> FROM `DEMO`))) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
\(~\)
\(~\)
12.2 Lab Features
# browseURL('https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Laboratory&CycleBeginYear=2017')
<- tbl(NHANES_DB, "ALB_CR")
ALB_CR <- tbl(NHANES_DB, 'HDL')
HDL <- tbl(NHANES_DB, 'TRIGLY')
TRIGLY <- tbl(NHANES_DB, "TCHOL")
TCHOL <- tbl(NHANES_DB,"INS")
INS <- tbl(NHANES_DB, 'GLU')
GLU <- tbl(NHANES_DB,"UCPREG") UCPREG
12.2.1 Mapping Column Issues
<- DEMO %>%
prep_LABS_TBL select(SEQN, yr_range) %>%
left_join(ALB_CR) %>%
left_join(HDL) %>%
left_join(TRIGLY) %>%
left_join(TCHOL) %>%
left_join(INS,
by=c("SEQN", "yr_range"),
suffix=c('.TCHOL','')) %>%
left_join(GLU,
by=c("SEQN", "yr_range"),
suffix=c('.INS','.GLU')) %>%
left_join(UCPREG)
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
%>%
prep_LABS_TBL glimpse()
#> Rows: ??
#> Columns: 43
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ~
#> $ yr_range <chr> "1999-2000", "1999-2000", "1999-2000", "1999-2000", "19~
#> $ URXUMA <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUMS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUCR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXCRS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDACT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUMA2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUMA2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUCR2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUCR2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDACT2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUMALC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUCRLC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDHDD <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDHDDSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.TCHOL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXTR <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDTRSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDLSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXAPB <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDAPBSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDLM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDMSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDLN <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDNSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXTC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDTCSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXIN.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINSI.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTMN.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINLC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.GLU <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXGLU <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDGLUSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXIN.GLU <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINSI.GLU <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR.GLU <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTMN.GLU <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXPREG <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
<- colnames(prep_LABS_TBL)[!colnames(prep_LABS_TBL) %in% c('SEQN','yr_range')]
lab_features
lab_features#> [1] "URXUMA" "URXUMS" "URXUCR" "URXCRS"
#> [5] "URDACT" "URXUMA2" "URDUMA2S" "URXUCR2"
#> [9] "URDUCR2S" "URDACT2" "URDUMALC" "URDUCRLC"
#> [13] "LBDHDD" "LBDHDDSI" "WTSAF2YR.TCHOL" "LBXTR"
#> [17] "LBDTRSI" "LBDLDL" "LBDLDLSI" "LBXAPB"
#> [21] "LBDAPBSI" "LBDLDLM" "LBDLDMSI" "LBDLDLN"
#> [25] "LBDLDNSI" "LBXTC" "LBDTCSI" "WTSAF2YR.INS"
#> [29] "LBXIN.INS" "LBDINSI.INS" "PHAFSTHR.INS" "PHAFSTMN.INS"
#> [33] "LBDINLC" "WTSAF2YR.GLU" "LBXGLU" "LBDGLUSI"
#> [37] "LBXIN.GLU" "LBDINSI.GLU" "PHAFSTHR.GLU" "PHAFSTMN.GLU"
#> [41] "URXPREG"
<- tibble(lab_features)
lab_features_tibble
%>%
lab_features_tibble head()
#> # A tibble: 6 x 1
#> lab_features
#> <chr>
#> 1 URXUMA
#> 2 URXUMS
#> 3 URXUCR
#> 4 URXCRS
#> 5 URDACT
#> 6 URXUMA2
<- lab_features_tibble %>%
lab_features_tibble mutate(appears_in_other_tables = str_detect(lab_features , "[.]"))
%>%
lab_features_tibble head()
#> # A tibble: 6 x 2
#> lab_features appears_in_other_tables
#> <chr> <lgl>
#> 1 URXUMA FALSE
#> 2 URXUMS FALSE
#> 3 URXUCR FALSE
#> 4 URXCRS FALSE
#> 5 URDACT FALSE
#> 6 URXUMA2 FALSE
%>%
lab_features_tibble filter(appears_in_other_tables == TRUE) %>%
separate(lab_features, into=c('feature','SRC_TBL'), sep='[.]')
#> # A tibble: 11 x 3
#> feature SRC_TBL appears_in_other_tables
#> <chr> <chr> <lgl>
#> 1 WTSAF2YR TCHOL TRUE
#> 2 WTSAF2YR INS TRUE
#> 3 LBXIN INS TRUE
#> 4 LBDINSI INS TRUE
#> 5 PHAFSTHR INS TRUE
#> 6 PHAFSTMN INS TRUE
#> # ... with 5 more rows
<- tibble(lab_features) %>%
lab_features_tibble mutate(tmp = lab_features) %>%
separate(tmp,
into=c('feature','SRC_TBL'),
sep='[.]')
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 30 rows [1, 2, 3,
#> 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, ...].
lab_features_tibble#> # A tibble: 41 x 3
#> lab_features feature SRC_TBL
#> <chr> <chr> <chr>
#> 1 URXUMA URXUMA <NA>
#> 2 URXUMS URXUMS <NA>
#> 3 URXUCR URXUCR <NA>
#> 4 URXCRS URXCRS <NA>
#> 5 URDACT URDACT <NA>
#> 6 URXUMA2 URXUMA2 <NA>
#> # ... with 35 more rows
%>%
lab_features_tibble filter(feature == 'WTSAF2YR')
#> # A tibble: 3 x 3
#> lab_features feature SRC_TBL
#> <chr> <chr> <chr>
#> 1 WTSAF2YR.TCHOL WTSAF2YR TCHOL
#> 2 WTSAF2YR.INS WTSAF2YR INS
#> 3 WTSAF2YR.GLU WTSAF2YR GLU
<- lab_features_tibble %>%
lab_features_to_map filter(!is.na(SRC_TBL)) %>%
arrange(feature)
lab_features_to_map#> # A tibble: 11 x 3
#> lab_features feature SRC_TBL
#> <chr> <chr> <chr>
#> 1 LBDINSI.INS LBDINSI INS
#> 2 LBDINSI.GLU LBDINSI GLU
#> 3 LBXIN.INS LBXIN INS
#> 4 LBXIN.GLU LBXIN GLU
#> 5 PHAFSTHR.INS PHAFSTHR INS
#> 6 PHAFSTHR.GLU PHAFSTHR GLU
#> # ... with 5 more rows
%>%
lab_features_to_map filter(feature == 'WTSAF2YR')
#> # A tibble: 3 x 3
#> lab_features feature SRC_TBL
#> <chr> <chr> <chr>
#> 1 WTSAF2YR.TCHOL WTSAF2YR TCHOL
#> 2 WTSAF2YR.INS WTSAF2YR INS
#> 3 WTSAF2YR.GLU WTSAF2YR GLU
%>%
prep_LABS_TBL filter(!is.na(WTSAF2YR.TCHOL) | !is.na(WTSAF2YR.INS) | !is.na(WTSAF2YR.GLU)) %>%
select(contains('WTSAF2YR'))
#> # Source: lazy query [?? x 3]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> WTSAF2YR.TCHOL WTSAF2YR.INS WTSAF2YR.GLU
#> <dbl> <dbl> <dbl>
#> 1 0 NA 0
#> 2 67557. NA 67557.
#> 3 80194. NA 80194.
#> 4 15668. NA 15668.
#> 5 93400. NA 93400.
#> 6 0 NA 0
#> # ... with more rows
<- prep_LABS_TBL %>%
LABS_TBL mutate(WTSAF2YR = coalesce(WTSAF2YR.INS, WTSAF2YR.TCHOL , WTSAF2YR.GLU))
%>%
LABS_TBL select(yr_range, WTSAF2YR, WTSAF2YR.TCHOL, WTSAF2YR.INS, WTSAF2YR.GLU) %>%
filter(!is.na(WTSAF2YR))
#> # Source: lazy query [?? x 5]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> yr_range WTSAF2YR WTSAF2YR.TCHOL WTSAF2YR.INS WTSAF2YR.GLU
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 2005-2006 0 0 NA 0
#> 2 2005-2006 67557. 67557. NA 67557.
#> 3 2005-2006 80194. 80194. NA 80194.
#> 4 2005-2006 15668. 15668. NA 15668.
#> 5 2005-2006 93400. 93400. NA 93400.
#> 6 2005-2006 0 0 NA 0
#> # ... with more rows
%>%
lab_features_to_map filter(feature == 'LBDINSI')
#> # A tibble: 2 x 3
#> lab_features feature SRC_TBL
#> <chr> <chr> <chr>
#> 1 LBDINSI.INS LBDINSI INS
#> 2 LBDINSI.GLU LBDINSI GLU
%>%
prep_LABS_TBL mutate(LBDINSI = coalesce(LBDINSI.INS, LBDINSI.GLU)) %>%
filter(!is.na(LBDINSI.INS) | !is.na(LBDINSI.GLU) ) %>%
select(contains('LBDINSI'))
#> # Source: lazy query [?? x 3]
#> # Database: sqlite 3.35.5
#> # [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> LBDINSI.INS LBDINSI.GLU LBDINSI
#> <dbl> <dbl> <dbl>
#> 1 NA 60.2 60.2
#> 2 NA 53.9 53.9
#> 3 NA 67.6 67.6
#> 4 NA 87.1 87.1
#> 5 NA 62.8 62.8
#> 6 NA 23.5 23.5
#> # ... with more rows
<- prep_LABS_TBL %>%
check_LABS_TBL mutate(WTSAF2YR = coalesce(WTSAF2YR.TCHOL, WTSAF2YR.INS, WTSAF2YR.GLU)) %>%
mutate(PHAFSTMN = coalesce(PHAFSTMN.INS, PHAFSTMN.GLU)) %>%
mutate(PHAFSTHR = coalesce(PHAFSTHR.INS, PHAFSTHR.GLU)) %>%
mutate(LBXIN = coalesce(LBXIN.INS,LBXIN.GLU)) %>%
mutate(LBDINSI = coalesce(LBDINSI.INS,LBDINSI.GLU))
<- check_LABS_TBL %>%
check_maped_cols select(contains(c('WTSAF2YR','PHAFSTMN','PHAFSTHR','LBXIN','LBDINSI'))) %>%
colnames()
check_maped_cols#> [1] "WTSAF2YR.TCHOL" "WTSAF2YR.INS" "WTSAF2YR.GLU" "WTSAF2YR"
#> [5] "PHAFSTMN.INS" "PHAFSTMN.GLU" "PHAFSTMN" "PHAFSTHR.INS"
#> [9] "PHAFSTHR.GLU" "PHAFSTHR" "LBXIN.INS" "LBXIN.GLU"
#> [13] "LBXIN" "LBDINSI.INS" "LBDINSI.GLU" "LBDINSI"
%>%
check_LABS_TBL select(all_of(check_maped_cols)) %>%
filter_at(.vars=check_maped_cols, any_vars(!is.na(.))) %>%
glimpse()
#> Rows: ??
#> Columns: 16
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ WTSAF2YR.TCHOL <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00~
#> $ WTSAF2YR.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.GLU <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00~
#> $ WTSAF2YR <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00~
#> $ PHAFSTMN.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTMN.GLU <dbl> 3, 9, 29, 32, 35, NA, NA, 54, 17, 54, 33, 0, 27, 35, 22~
#> $ PHAFSTMN <dbl> 3, 9, 29, 32, 35, NA, NA, 54, 17, 54, 33, 0, 27, 35, 22~
#> $ PHAFSTHR.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR.GLU <dbl> 10, 14, 11, 12, 14, NA, NA, 9, 3, 10, 11, 14, 13, 9, 14~
#> $ PHAFSTHR <dbl> 10, 14, 11, 12, 14, NA, NA, 9, 3, 10, 11, 14, 13, 9, 14~
#> $ LBXIN.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXIN.GLU <dbl> NA, 10.03, 8.99, 11.27, 14.51, NA, NA, 10.46, 3.91, 6.0~
#> $ LBXIN <dbl> NA, 10.03, 8.99, 11.27, 14.51, NA, NA, 10.46, 3.91, 6.0~
#> $ LBDINSI.INS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINSI.GLU <dbl> NA, 60.18, 53.94, 67.62, 87.06, NA, NA, 62.76, 23.46, 3~
#> $ LBDINSI <dbl> NA, 60.18, 53.94, 67.62, 87.06, NA, NA, 62.76, 23.46, 3~
12.2.2 Lab Features Final
<- check_LABS_TBL %>%
LABS_TBL select(-WTSAF2YR.TCHOL, -WTSAF2YR.INS, -WTSAF2YR.GLU) %>%
select(-PHAFSTMN.INS, -PHAFSTMN.GLU) %>%
select(-PHAFSTHR.INS, -PHAFSTHR.GLU) %>%
select(-LBXIN.INS , -LBXIN.GLU) %>%
select(-LBDINSI.INS , -LBDINSI.GLU)
%>%
LABS_TBL filter_at(vars(lab_features_tibble$feature), any_vars(!is.na(.))) %>%
glimpse()
#> Rows: ??
#> Columns: 37
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN <dbl> 31128, 31129, 31130, 31131, 31132, 31133, 31134, 31137, 31139~
#> $ yr_range <chr> "2005-2006", "2005-2006", "2005-2006", "2005-2006", "2005-200~
#> $ URXUMA <dbl> 82.1, 17.8, NA, 18.0, 6.5, 25.2, 10.1, 287.5, 5.9, 17.5, 5.1,~
#> $ URXUMS <dbl> 82.1, 17.8, NA, 18.0, 6.5, 25.2, 10.1, 287.5, 5.9, 17.5, 5.1,~
#> $ URXUCR <dbl> 291, 288, NA, 202, 162, 275, 140, 329, 125, 176, 91, 74, 143,~
#> $ URXCRS <dbl> 25724, 25459, NA, 17857, 14321, 24310, 12376, 29084, 11050, 1~
#> $ URDACT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URXUMA2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUMA2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URXUCR2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUCR2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDACT2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUMALC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUCRLC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDHDD <dbl> 55, 46, NA, 39, 59, 54, 49, 45, NA, 57, NA, NA, 44, 45, 48, 4~
#> $ LBDHDDSI <dbl> 1.42, 1.19, NA, 1.01, 1.53, 1.40, 1.27, 1.16, NA, 1.47, NA, N~
#> $ LBXTR <dbl> NA, NA, NA, 86, 65, 61, 195, NA, NA, NA, NA, NA, NA, NA, NA, ~
#> $ LBDTRSI <dbl> NA, NA, NA, 0.971, 0.734, 0.689, 2.202, NA, NA, NA, NA, NA, N~
#> $ LBDLDL <dbl> NA, NA, NA, 49, 75, 81, 98, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDLSI <dbl> NA, NA, NA, 1.267, 1.940, 2.095, 2.534, NA, NA, NA, NA, NA, N~
#> $ LBXAPB <dbl> NA, NA, NA, 50, 75, 75, 111, NA, NA, NA, NA, NA, NA, NA, NA, ~
#> $ LBDAPBSI <dbl> NA, NA, NA, 0.50, 0.75, 0.75, 1.11, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDLM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDMSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDLN <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDNSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBXTC <dbl> 129, 170, NA, 105, 147, 147, 186, 129, NA, 141, NA, NA, 108, ~
#> $ LBDTCSI <dbl> 3.34, 4.40, NA, 2.72, 3.80, 3.80, 4.81, 3.34, NA, 3.65, NA, N~
#> $ LBDINLC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBXGLU <dbl> NA, NA, NA, 90, 157, 84, 100, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDGLUSI <dbl> NA, NA, NA, 4.996, 8.715, 4.663, 5.551, NA, NA, NA, NA, NA, N~
#> $ URXPREG <dbl> NA, NA, NA, 2, NA, 2, NA, 2, 2, NA, NA, NA, NA, NA, NA, NA, N~
#> $ WTSAF2YR <dbl> NA, NA, 0.00, 67556.81, 80193.96, 15668.02, 93399.54, NA, 0.0~
#> $ PHAFSTMN <dbl> NA, NA, 3, 9, 29, 32, 35, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR <dbl> NA, NA, 10, 14, 11, 12, 14, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBXIN <dbl> NA, NA, NA, 10.03, 8.99, 11.27, 14.51, NA, NA, NA, NA, NA, NA~
#> $ LBDINSI <dbl> NA, NA, NA, 60.18, 53.94, 67.62, 87.06, NA, NA, NA, NA, NA, N~
\(~\)
\(~\)
12.3 Examination Features
# browseURL('https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Examination&CycleBeginYear=2017')
<- tbl(NHANES_DB, 'BPX')
BPX <- tbl(NHANES_DB,'BMX') BMX
12.3.1 Examination Features
<- DEMO %>%
MEASURES_TBL select(SEQN, yr_range) %>%
left_join(BPX) %>%
left_join(BMX)
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
%>%
MEASURES_TBL glimpse()
#> Rows: ??
#> Columns: 84
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18~
#> $ yr_range <chr> "1999-2000", "1999-2000", "1999-2000", "1999-2000", "1999-200~
#> $ PEASCST1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
#> $ PEASCTM1 <dbl> 151, 764, 571, 47, 694, 581, 852, 807, 579, 584, 751, 1169, 7~
#> $ PEASCCT1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
#> $ BPXCHR <dbl> 110, NA, NA, 108, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ BPQ150A <dbl> NA, 2, 2, NA, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, NA, NA, NA,~
#> $ BPQ150B <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPQ150C <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPQ150D <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPAARM <dbl> NA, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA,~
#> $ BPACSZ <dbl> NA, 3, 2, NA, 4, 2, 4, 2, 3, 4, 3, 4, 2, 3, 3, 3, NA, NA, NA,~
#> $ BPXPLS <dbl> NA, 68, 104, NA, 66, 70, 58, 96, 84, 58, 62, 64, 102, 72, 68,~
#> $ BPXDB <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BPXPULS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
#> $ BPXPTY <dbl> NA, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA,~
#> $ BPXML1 <dbl> NA, 130, 140, NA, 140, 140, 160, 130, 140, 190, 130, 200, 170~
#> $ BPXSY1 <dbl> NA, 106, 110, NA, 122, 116, 130, NA, 104, 152, 110, 182, 140,~
#> $ BPXDI1 <dbl> NA, 58, 60, NA, 82, 64, 78, NA, 60, 98, 52, 108, 78, 56, 68, ~
#> $ BPAEN1 <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPXSY2 <dbl> NA, 98, 104, NA, 122, 116, 122, 106, 114, 142, 110, 172, 130,~
#> $ BPXDI2 <dbl> NA, 56, 64, NA, 84, 60, 80, 44, 52, 94, 52, 98, 62, NA, 68, 5~
#> $ BPAEN2 <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPXSY3 <dbl> NA, 98, 112, NA, 122, 112, 124, 100, 110, 142, 104, 176, 130,~
#> $ BPXDI3 <dbl> NA, 56, 62, NA, 82, 80, 82, 48, 48, 96, 50, 100, 70, 58, 70, ~
#> $ BPAEN3 <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPXSY4 <dbl> NA, NA, NA, NA, NA, NA, NA, 96, NA, NA, NA, NA, NA, 138, NA, ~
#> $ BPXDI4 <dbl> NA, NA, NA, NA, NA, NA, NA, 56, NA, NA, NA, NA, NA, 64, NA, N~
#> $ BPAEN4 <dbl> NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, 2, NA, NA,~
#> $ BPXSAR <dbl> NA, 98, 108, NA, 122, 114, 123, 98, 112, 142, 107, 174, 130, ~
#> $ BPXDAR <dbl> NA, 56, 63, NA, 83, 70, 81, 52, 50, 95, 51, 99, 66, 61, 69, 6~
#> $ BMAEXLEN <dbl> 289, 376, 199, 170, 277, 252, 303, 257, 178, 218, 311, 274, 2~
#> $ BMAEXSTS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
#> $ BMAEXCMT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXWT <dbl> 12.5, 75.4, 32.9, 13.3, 92.5, 59.2, 78.0, 40.7, 45.5, 111.8, ~
#> $ BMIWT <dbl> 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3,~
#> $ BMXRECUM <dbl> 93.2, NA, NA, 87.1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMIRECUM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXHEAD <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMIHEAD <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXHT <dbl> 91.6, 174.0, 136.6, NA, 178.3, 162.0, 162.9, 162.0, 156.9, 19~
#> $ BMIHT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXBMI <dbl> 14.90, 24.90, 17.63, NA, 29.10, 22.56, 29.39, 15.51, 18.48, 3~
#> $ BMXLEG <dbl> NA, NA, 34.1, NA, 45.2, 39.7, 43.0, 41.6, 42.2, 46.6, 40.5, 4~
#> $ BMILEG <dbl> NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
#> $ BMXCALF <dbl> NA, 37.5, 29.0, NA, 42.6, 34.0, 37.2, 30.0, 32.7, 43.7, 37.8,~
#> $ BMICALF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXARML <dbl> 18.6, 38.2, 25.5, 20.4, 39.7, 34.5, 38.1, 36.7, 32.0, 43.0, 3~
#> $ BMIARML <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXARMC <dbl> 15.2, 29.8, 19.7, 16.4, 35.8, 26.0, 31.7, 20.1, 22.1, 37.6, 2~
#> $ BMIARMC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXWAIST <dbl> 45.7, 98.0, 64.7, NA, 99.9, 81.6, 90.7, 64.1, 64.6, 108.0, 76~
#> $ BMIWAIST <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA~
#> $ BMXTHICR <dbl> NA, NA, 38.2, NA, 56.2, 47.0, 55.7, 39.1, 45.3, 64.0, 39.9, 5~
#> $ BMITHICR <dbl> NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA,~
#> $ BMXTRI <dbl> 8.3, 12.8, 8.0, 10.8, 17.4, 20.3, 26.4, 6.2, 11.1, 15.5, 8.0,~
#> $ BMITRI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSUB <dbl> 5.0, 20.4, 6.5, 10.2, 38.6, 16.8, 34.2, 5.2, 7.6, 26.6, 7.8, ~
#> $ BMISUB <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 1, 1, NA, ~
#> $ BMAAMP <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
#> $ BMAUREXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMAUPREL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMAULEXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMAUPLEL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALOREX <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALORKN <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALLEXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALLKNE <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDSTATS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDRECUF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDSUBF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDTHICF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDLEGF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDARMLF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDCALFF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDBMIC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD3 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD4 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDAVSAD <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDSADCM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXHIP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMIHIP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
This concludes the section on engineering features.