12 Additional Feature Engineering

12.1 Demographic Features

library('tidyverse')
#> -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
#> v ggplot2 3.3.3     v purrr   0.3.4
#> v tibble  3.1.2     v dplyr   1.0.6
#> v tidyr   1.1.3     v stringr 1.4.0
#> v readr   1.4.0     v forcats 0.5.1
#> -- Conflicts ------------------------------------------ tidyverse_conflicts() --
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag()    masks stats::lag()

source('C:/Users/jkyle/Documents/GitHub/Jeff_Data_Wrangling/Functions/connect_help.R')
#> Warning: `src_sqlite()` was deprecated in dplyr 1.0.0.
#> Please use `tbl()` directly with a database connection

NHANES_Tables
#>  [1] "ACQ"      "ALB_CR"   "ALQ"      "ALQY"     "AUQ"      "BIOPRO"  
#>  [7] "BMX"      "BPQ"      "BPX"      "CBC"      "CDQ"      "CMV"     
#> [13] "COT"      "CRCO"     "DBQ"      "DEMO"     "DEQ"      "DIQ"     
#> [19] "DLQ"      "DPQ"      "DR1IFF"   "DR1TOT"   "DR2IFF"   "DR2TOT"  
#> [25] "DRXFCD"   "DS1IDS"   "DS1TOT"   "DS2TOT"   "DSBI"     "DSII"    
#> [31] "DSQIDS"   "DSQTOT"   "DUQ"      "DXX"      "DXXFEM"   "DXXSPN"  
#> [37] "Ds2IDS"   "ECQ"      "FASTQX"   "FERTIN"   "FETIB"    "FOLATE"  
#> [43] "FOLFMS"   "GHB"      "GLU"      "HDL"      "HEPA"     "HEPBD"   
#> [49] "HEPC"     "HEPE"     "HEQ"      "HIQ"      "HIV"      "HOQ"     
#> [55] "HSCRP"    "HSQ"      "HSV"      "HUQ"      "IHGEM"    "IMQ"     
#> [61] "INS"      "KIQ"      "LUX"      "MCQ"      "OCQ"      "OHQ"     
#> [67] "OHXDEN"   "OHXREF"   "OSQ"      "PAQ"      "PAQY"     "PBCD"    
#> [73] "PFAS"     "PFQ"      "PUQMEC"   "RHQ"      "RXQASA"   "RXQ_DRUG"
#> [79] "RXQ_RX"   "SLQ"      "SMQ"      "SMQFAM"   "SMQRTU"   "SMQSHS"  
#> [85] "SSPFAS"   "SXQ"      "TCHOL"    "TFR"      "TRIGLY"   "UCFLOW"  
#> [91] "UCM"      "UCPREG"   "UHG"      "UIO"      "UNI"      "VIC"     
#> [97] "VOCWB"    "VTQ"

\(~\)


\(~\)

# Open_NHANES_table_help('DEMO')

12.1.1 Age & Gender

AGE_GENDER_TBL <- DEMO %>%
  mutate(Gender = if_else(RIAGENDR == 2 , "Female", "Male")) %>%
  mutate(Age = if_else(is.na(RIDAGEYR), RIDAGEMN/12 , RIDAGEYR)) %>%
  select(SEQN, Age, Gender) 

AGE_GENDER_TBL %>%
  glimpse()
#> Rows: ??
#> Columns: 3
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ~
#> $ Age    <dbl> 2, 77, 10, 1, 49, 19, 59, 13, 11, 43, 15, 37, 70, 81, 38, 85, 2~
#> $ Gender <chr> "Female", "Male", "Female", "Male", "Male", "Female", "Female",~

12.1.2 Race/Hispanic origin

We see that only RIDRETH1 is available in both 2017-2018 and 1999-2000:

RACE_TBL <- DEMO %>%
  select(SEQN, RIDRETH1) %>%
  mutate(Race = 
           case_when(RIDRETH1 == 1 ~ "Mexican American",
                     RIDRETH1 == 2 ~ "Other Hispanic",
                     RIDRETH1 == 3 ~ "White",
                     RIDRETH1 == 4 ~ "Black",
                     RIDRETH1 == 5 ~ "Other",
                     !(RIDRETH1 %in% c(1,2,3,4,5)) ~ NA )) %>%
  select(-RIDRETH1)

RACE_TBL %>% 
  head()
#> # Source:   lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>    SEQN Race 
#>   <dbl> <chr>
#> 1     1 Black
#> 2     2 White
#> 3     3 White
#> 4     4 Black
#> 5     5 White
#> 6     6 Other

12.1.3 US Armed Forces

We see that only DMQMILIZ is available since 2011-2012, prior to 2011, the veteran status information (released in the variable DMQMILIT in the demographics file in 1999-2010)

USFA_TBL <- DEMO %>%
  select(SEQN, DMQMILIZ, DMQMILIT) %>%
  mutate(USAF = 
           case_when((DMQMILIZ == 1 | DMQMILIT == 1) ~ "Yes",
                     (DMQMILIZ == 2 | DMQMILIT == 2) ~ "No",
                     !(DMQMILIZ %in% c(1,2) | DMQMILIT %in% c(1,2) ) |(is.na(DMQMILIZ) & is.na(DMQMILIT))   ~ NA )) %>%
  select(-DMQMILIZ, -DMQMILIT)

USFA_TBL %>% 
  head()
#> # Source:   lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>    SEQN USAF 
#>   <dbl> <chr>
#> 1     1 <NA> 
#> 2     2 Yes  
#> 3     3 <NA> 
#> 4     4 <NA> 
#> 5     5 Yes  
#> 6     6 No

12.1.4 Country of Birth

Due to the concerns of disclosure risk, starting in 2011, country of birth was recoded into two categories: 1) Born in 50 U.S. states or Washington, DC; and 2) Born in other countries, including U.S. territories.

BIRTH_COUNTRY_TBL <- DEMO %>%
  select(SEQN, DMDBORN, DMDBORN2, DMDBORN4 ) %>%
  mutate(Birth_Country = 
           case_when(DMDBORN == 1 | DMDBORN2 == 1 | DMDBORN4 == 1 ~ "USA",
                     DMDBORN == 2 | DMDBORN2 == 2 | DMDBORN4 == 2 ~ "Other",
                     (DMDBORN > 2 | DMDBORN2 > 2 | DMDBORN4 > 2 ) | 
                       (is.na(DMDBORN) & is.na(DMDBORN2) & is.na(DMDBORN4)) ~ NA  )) %>%
  select(SEQN, Birth_Country)

BIRTH_COUNTRY_TBL %>% 
  head()
#> # Source:   lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>    SEQN Birth_Country
#>   <dbl> <chr>        
#> 1     1 USA          
#> 2     2 USA          
#> 3     3 <NA>         
#> 4     4 USA          
#> 5     5 USA          
#> 6     6 USA

12.1.5 Education

EDUCATION_TBL <- DEMO %>%
  select(SEQN, DMDEDUC3, DMDEDUC2) %>%
  mutate(Grade_Level = 
           case_when(DMDEDUC3 ==0~"Never attended / kindergarten only",
DMDEDUC3 ==1~"1st grade",
DMDEDUC3 ==2~"2nd grade",
DMDEDUC3 ==3~"3rd grade",
DMDEDUC3 ==4~"4th grade",
DMDEDUC3 ==5~"5th grade",
DMDEDUC3 ==6~"6th grade",
DMDEDUC3 ==7~"7th grade",
DMDEDUC3 ==8~"8th grade",
DMDEDUC3 ==9~"9th grade",
DMDEDUC3 ==10~"10th grade",
DMDEDUC3 ==11~"11th grade",
DMDEDUC3 ==12~"12th grade, no diploma",
DMDEDUC3 ==13~"High school graduate",
DMDEDUC3 ==14~"GED or equivalent",
DMDEDUC3 ==15~"More than high school",
DMDEDUC3 ==55~"Less than 5th grade",
DMDEDUC3 ==66~"Less than 9th grade",
DMDEDUC3 ==77~"Refused",
DMDEDUC3 ==99~"Don't Know",
is.na(DMDEDUC3) |  DMDEDUC3 > 66 ~ NA)) %>%
  select(-DMDEDUC3) %>%
  mutate(Grade_Range = 
           case_when(DMDEDUC2 ==1~"Less than 9th grade",
DMDEDUC2 ==2~"9-11th grade (Includes 12th grade with no diploma)",
DMDEDUC2 ==3~"High school graduate/GED or equivalent",
DMDEDUC2 ==4~"Some college or AA degree",
DMDEDUC2 ==5~"College graduate or above",
DMDEDUC2 > 5 | is.na(DMDEDUC2)  ~ NA )) %>%
  select(-DMDEDUC2)

EDUCATION_TBL %>% 
  head()
#> # Source:   lazy query [?? x 3]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>    SEQN Grade_Level           Grade_Range              
#>   <dbl> <chr>                 <chr>                    
#> 1     1 <NA>                  <NA>                     
#> 2     2 <NA>                  College graduate or above
#> 3     3 3rd grade             <NA>                     
#> 4     4 <NA>                  <NA>                     
#> 5     5 <NA>                  College graduate or above
#> 6     6 More than high school <NA>

12.1.6 Marital Status


MARITAL_STATUS_TBL <- DEMO %>%
  select(SEQN, DMDMARTL) %>%
  mutate(Marital_Status = 
           case_when(DMDMARTL ==1~"Married",
DMDMARTL ==2~"Widowed",
DMDMARTL ==3~"Divorced",
DMDMARTL ==4~"Separated",
DMDMARTL ==5~"Never married",
DMDMARTL ==6~"Living with partner",
DMDMARTL >6 | is.na(DMDMARTL) ~ NA )) %>%
  select(-DMDMARTL)

MARITAL_STATUS_TBL %>% 
  head()
#> # Source:   lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>    SEQN Marital_Status
#>   <dbl> <chr>         
#> 1     1 <NA>          
#> 2     2 <NA>          
#> 3     3 <NA>          
#> 4     4 <NA>          
#> 5     5 Married       
#> 6     6 Never married

12.1.7 Pregnancy Status

PREG_STATUS_TBL <- DEMO %>%
  select(SEQN, RIDEXPRG) %>%
  mutate(Pregnant = 
           case_when(RIDEXPRG == 1 ~ "Pregnant",
                     RIDEXPRG == 2 ~ "Not Pregnant",
                     RIDEXPRG > 2 | is.na(RIDEXPRG) ~ NA )) %>%
  select(-RIDEXPRG)

PREG_STATUS_TBL %>% 
  head()
#> # Source:   lazy query [?? x 2]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>    SEQN Pregnant    
#>   <dbl> <chr>       
#> 1     1 <NA>        
#> 2     2 <NA>        
#> 3     3 <NA>        
#> 4     4 <NA>        
#> 5     5 <NA>        
#> 6     6 Not Pregnant

12.1.8 Income

INCOME_TBL <- DEMO %>%
  select(SEQN, INDHHIN2, INDFMIN2, INDFMPIR) %>%
  mutate(Household_Icome = case_when(INDHHIN2 ==1~"$ 0 to $ 4,999",
INDHHIN2 ==2~"$ 5,000 to $ 9,999",
INDHHIN2 ==3~"$10,000 to $14,999",
INDHHIN2 ==4~"$15,000 to $19,999",
INDHHIN2 ==5~"$20,000 to $24,999",
INDHHIN2 ==6~"$25,000 to $34,999",
INDHHIN2 ==7~"$35,000 to $44,999",
INDHHIN2 ==8~"$45,000 to $54,999",
INDHHIN2 ==9~"$55,000 to $64,999",
INDHHIN2 ==10~"$65,000 to $74,999",
INDHHIN2 ==12~"$20,000 and Over",
INDHHIN2 ==13~"Under $20,000",
INDHHIN2 ==14~"$75,000 to $99,999",
INDHHIN2 ==15~"$100,000 and Over",
INDHHIN2 > 15 | is.na(INDHHIN2) ~ NA)) %>%
  select(-INDHHIN2) %>%
  mutate(Family_Income = case_when(INDFMIN2 ==1~"$ 0 to $ 4,999",
INDFMIN2 ==2~"$ 5,000 to $ 9,999",
INDFMIN2 ==3~"$10,000 to $14,999",
INDFMIN2 ==4~"$15,000 to $19,999",
INDFMIN2 ==5~"$20,000 to $24,999",
INDFMIN2 ==6~"$25,000 to $34,999",
INDFMIN2 ==7~"$35,000 to $44,999",
INDFMIN2 ==8~"$45,000 to $54,999",
INDFMIN2 ==9~"$55,000 to $64,999",
INDFMIN2 ==10~"$65,000 to $74,999",
INDFMIN2 ==12~"$20,000 and Over",
INDFMIN2 ==13~"Under $20,000",
INDFMIN2 ==14~"$75,000 to $99,999",
INDFMIN2 ==15~"$100,000 and Over",
INDFMIN2 > 15 | is.na(INDFMIN2) ~ NA )) %>%
  select(-INDFMIN2) %>%
  mutate(Poverty_Income_Ratio = 
           case_when(INDFMPIR <= 5 ~ INDFMPIR,
                     is.na(INDFMPIR) ~ NA)) %>%
  select(-INDFMPIR)


INCOME_TBL %>% 
  head()
#> # Source:   lazy query [?? x 4]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>    SEQN Household_Icome Family_Income Poverty_Income_Ratio
#>   <dbl> <lgl>           <lgl>                        <dbl>
#> 1     1 NA              NA                            0.86
#> 2     2 NA              NA                            5   
#> 3     3 NA              NA                            1.47
#> 4     4 NA              NA                            0.57
#> 5     5 NA              NA                            5   
#> 6     6 NA              NA                            1.21

12.1.9 Demographic Features Final

DEMO_FEATURES <- DEMO %>%
  select(SEQN) %>%
  left_join(AGE_GENDER_TBL) %>%
  left_join(RACE_TBL) %>%
  left_join(USFA_TBL) %>%
  left_join(BIRTH_COUNTRY_TBL) %>%
  left_join(EDUCATION_TBL) %>%
  left_join(MARITAL_STATUS_TBL) %>%
  left_join(PREG_STATUS_TBL) %>%
  left_join(INCOME_TBL)
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
#> Joining, by = "SEQN"
DEMO_FEATURES %>%
  glimpse()
#> Rows: ??
#> Columns: 13
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN                 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
#> $ Age                  <dbl> 2, 77, 10, 1, 49, 19, 59, 13, 11, 43, 15, 37, 70,~
#> $ Gender               <chr> "Female", "Male", "Female", "Male", "Male", "Fema~
#> $ Race                 <chr> "Black", "White", "White", "Black", "White", "Oth~
#> $ USAF                 <chr> NA, "Yes", NA, NA, "Yes", "No", "No", NA, NA, "Ye~
#> $ Birth_Country        <chr> "USA", "USA", NA, "USA", "USA", "USA", "USA", "US~
#> $ Grade_Level          <chr> NA, NA, "3rd grade", NA, NA, "More than high scho~
#> $ Grade_Range          <chr> NA, "College graduate or above", NA, NA, "College~
#> $ Marital_Status       <chr> NA, NA, NA, NA, "Married", "Never married", "Marr~
#> $ Pregnant             <chr> NA, NA, NA, NA, NA, "Not Pregnant", "Not Pregnant~
#> $ Household_Icome      <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ Family_Income        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ Poverty_Income_Ratio <dbl> 0.86, 5.00, 1.47, 0.57, 5.00, 1.21, NA, 0.53, NA,~
DEMO_FEATURES %>%
  show_query()
#> <SQL>
#> SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`, `Marital_Status`, `Pregnant`, `Household_Icome`, `Family_Income`, `Poverty_Income_Ratio`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`, `Marital_Status`, `Pregnant`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`, `Marital_Status`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`, `Grade_Level`, `Grade_Range`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`, `Birth_Country`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`, `USAF`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`, `Race`
#> FROM (SELECT `LHS`.`SEQN` AS `SEQN`, `Age`, `Gender`
#> FROM (SELECT `SEQN`
#> FROM `DEMO`) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE WHEN (((`RIDAGEYR`) IS NULL)) THEN (`RIDAGEMN` / 12.0) WHEN NOT(((`RIDAGEYR`) IS NULL)) THEN (`RIDAGEYR`) END AS `Age`, `Gender`
#> FROM (SELECT `SEQN`, `SDDSRVYR`, `RIDSTATR`, `RIDEXMON`, `RIAGENDR`, `RIDAGEYR`, `RIDAGEMN`, `RIDAGEEX`, `RIDRETH1`, `RIDRETH2`, `DMQMILIT`, `DMDBORN`, `DMDCITZN`, `DMDYRSUS`, `DMDEDUC3`, `DMDEDUC2`, `DMDEDUC`, `DMDSCHOL`, `DMDMARTL`, `DMDHHSIZ`, `INDHHINC`, `INDFMINC`, `INDFMPIR`, `RIDEXPRG`, `RIDPREG`, `DMDHRGND`, `DMDHRAGE`, `DMDHRBRN`, `DMDHREDU`, `DMDHRMAR`, `DMDHSEDU`, `WTINT2YR`, `WTINT4YR`, `WTMEC2YR`, `WTMEC4YR`, `SDMVPSU`, `SDMVSTRA`, `SDJ1REPN`, `DMAETHN`, `DMARACE`, `WTMREP01`, `WTMREP02`, `WTMREP03`, `WTMREP04`, `WTMREP05`, `WTMREP06`, `WTMREP07`, `WTMREP08`, `WTMREP09`, `WTMREP10`, `WTMREP11`, `WTMREP12`, `WTMREP13`, `WTMREP14`, `WTMREP15`, `WTMREP16`, `WTMREP17`, `WTMREP18`, `WTMREP19`, `WTMREP20`, `WTMREP21`, `WTMREP22`, `WTMREP23`, `WTMREP24`, `WTMREP25`, `WTMREP26`, `WTMREP27`, `WTMREP28`, `WTMREP29`, `WTMREP30`, `WTMREP31`, `WTMREP32`, `WTMREP33`, `WTMREP34`, `WTMREP35`, `WTMREP36`, `WTMREP37`, `WTMREP38`, `WTMREP39`, `WTMREP40`, `WTMREP41`, `WTMREP42`, `WTMREP43`, `WTMREP44`, `WTMREP45`, `WTMREP46`, `WTMREP47`, `WTMREP48`, `WTMREP49`, `WTMREP50`, `WTMREP51`, `WTMREP52`, `WTIREP01`, `WTIREP02`, `WTIREP03`, `WTIREP04`, `WTIREP05`, `WTIREP06`, `WTIREP07`, `WTIREP08`, `WTIREP09`, `WTIREP10`, `WTIREP11`, `WTIREP12`, `WTIREP13`, `WTIREP14`, `WTIREP15`, `WTIREP16`, `WTIREP17`, `WTIREP18`, `WTIREP19`, `WTIREP20`, `WTIREP21`, `WTIREP22`, `WTIREP23`, `WTIREP24`, `WTIREP25`, `WTIREP26`, `WTIREP27`, `WTIREP28`, `WTIREP29`, `WTIREP30`, `WTIREP31`, `WTIREP32`, `WTIREP33`, `WTIREP34`, `WTIREP35`, `WTIREP36`, `WTIREP37`, `WTIREP38`, `WTIREP39`, `WTIREP40`, `WTIREP41`, `WTIREP42`, `WTIREP43`, `WTIREP44`, `WTIREP45`, `WTIREP46`, `WTIREP47`, `WTIREP48`, `WTIREP49`, `WTIREP50`, `WTIREP51`, `WTIREP52`, `yr_range`, `SIALANG`, `SIAPROXY`, `SIAINTRP`, `FIALANG`, `FIAPROXY`, `FIAINTRP`, `MIALANG`, `MIAPROXY`, `MIAINTRP`, `AIALANG`, `DMDFMSIZ`, `DMDBORN2`, `INDHHIN2`, `INDFMIN2`, `DMDHRBR2`, `RIDRETH3`, `RIDEXAGY`, `RIDEXAGM`, `DMQMILIZ`, `DMQADFC`, `DMDBORN4`, `AIALANGA`, `DMDHHSZA`, `DMDHHSZB`, `DMDHHSZE`, `DMDHRBR4`, `DMDHRAGZ`, `DMDHREDZ`, `DMDHRMAZ`, `DMDHSEDZ`, CASE WHEN (`RIAGENDR` = 2.0) THEN ('Female') WHEN NOT(`RIAGENDR` = 2.0) THEN ('Male') END AS `Gender`
#> FROM `DEMO`)) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`RIDRETH1` = 1.0) THEN ('Mexican American')
#> WHEN (`RIDRETH1` = 2.0) THEN ('Other Hispanic')
#> WHEN (`RIDRETH1` = 3.0) THEN ('White')
#> WHEN (`RIDRETH1` = 4.0) THEN ('Black')
#> WHEN (`RIDRETH1` = 5.0) THEN ('Other')
#> WHEN (NOT((`RIDRETH1` IN (1.0, 2.0, 3.0, 4.0, 5.0)))) THEN (NULL)
#> END AS `Race`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN ((`DMQMILIZ` = 1.0 OR `DMQMILIT` = 1.0)) THEN ('Yes')
#> WHEN ((`DMQMILIZ` = 2.0 OR `DMQMILIT` = 2.0)) THEN ('No')
#> WHEN (NOT((`DMQMILIZ` IN (1.0, 2.0) OR `DMQMILIT` IN (1.0, 2.0))) OR (((`DMQMILIZ`) IS NULL) AND ((`DMQMILIT`) IS NULL))) THEN (NULL)
#> END AS `USAF`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`DMDBORN` = 1.0 OR `DMDBORN2` = 1.0 OR `DMDBORN4` = 1.0) THEN ('USA')
#> WHEN (`DMDBORN` = 2.0 OR `DMDBORN2` = 2.0 OR `DMDBORN4` = 2.0) THEN ('Other')
#> WHEN ((`DMDBORN` > 2.0 OR `DMDBORN2` > 2.0 OR `DMDBORN4` > 2.0) OR (((`DMDBORN`) IS NULL) AND ((`DMDBORN2`) IS NULL) AND ((`DMDBORN4`) IS NULL))) THEN (NULL)
#> END AS `Birth_Country`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, `Grade_Level`, CASE
#> WHEN (`DMDEDUC2` = 1.0) THEN ('Less than 9th grade')
#> WHEN (`DMDEDUC2` = 2.0) THEN ('9-11th grade (Includes 12th grade with no diploma)')
#> WHEN (`DMDEDUC2` = 3.0) THEN ('High school graduate/GED or equivalent')
#> WHEN (`DMDEDUC2` = 4.0) THEN ('Some college or AA degree')
#> WHEN (`DMDEDUC2` = 5.0) THEN ('College graduate or above')
#> WHEN (`DMDEDUC2` > 5.0 OR ((`DMDEDUC2`) IS NULL)) THEN (NULL)
#> END AS `Grade_Range`
#> FROM (SELECT `SEQN`, `DMDEDUC2`, CASE
#> WHEN (`DMDEDUC3` = 0.0) THEN ('Never attended / kindergarten only')
#> WHEN (`DMDEDUC3` = 1.0) THEN ('1st grade')
#> WHEN (`DMDEDUC3` = 2.0) THEN ('2nd grade')
#> WHEN (`DMDEDUC3` = 3.0) THEN ('3rd grade')
#> WHEN (`DMDEDUC3` = 4.0) THEN ('4th grade')
#> WHEN (`DMDEDUC3` = 5.0) THEN ('5th grade')
#> WHEN (`DMDEDUC3` = 6.0) THEN ('6th grade')
#> WHEN (`DMDEDUC3` = 7.0) THEN ('7th grade')
#> WHEN (`DMDEDUC3` = 8.0) THEN ('8th grade')
#> WHEN (`DMDEDUC3` = 9.0) THEN ('9th grade')
#> WHEN (`DMDEDUC3` = 10.0) THEN ('10th grade')
#> WHEN (`DMDEDUC3` = 11.0) THEN ('11th grade')
#> WHEN (`DMDEDUC3` = 12.0) THEN ('12th grade, no diploma')
#> WHEN (`DMDEDUC3` = 13.0) THEN ('High school graduate')
#> WHEN (`DMDEDUC3` = 14.0) THEN ('GED or equivalent')
#> WHEN (`DMDEDUC3` = 15.0) THEN ('More than high school')
#> WHEN (`DMDEDUC3` = 55.0) THEN ('Less than 5th grade')
#> WHEN (`DMDEDUC3` = 66.0) THEN ('Less than 9th grade')
#> WHEN (`DMDEDUC3` = 77.0) THEN ('Refused')
#> WHEN (`DMDEDUC3` = 99.0) THEN ('Don''t Know')
#> WHEN (((`DMDEDUC3`) IS NULL) OR `DMDEDUC3` > 66.0) THEN (NULL)
#> END AS `Grade_Level`
#> FROM `DEMO`)) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`DMDMARTL` = 1.0) THEN ('Married')
#> WHEN (`DMDMARTL` = 2.0) THEN ('Widowed')
#> WHEN (`DMDMARTL` = 3.0) THEN ('Divorced')
#> WHEN (`DMDMARTL` = 4.0) THEN ('Separated')
#> WHEN (`DMDMARTL` = 5.0) THEN ('Never married')
#> WHEN (`DMDMARTL` = 6.0) THEN ('Living with partner')
#> WHEN (`DMDMARTL` > 6.0 OR ((`DMDMARTL`) IS NULL)) THEN (NULL)
#> END AS `Marital_Status`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, CASE
#> WHEN (`RIDEXPRG` = 1.0) THEN ('Pregnant')
#> WHEN (`RIDEXPRG` = 2.0) THEN ('Not Pregnant')
#> WHEN (`RIDEXPRG` > 2.0 OR ((`RIDEXPRG`) IS NULL)) THEN (NULL)
#> END AS `Pregnant`
#> FROM `DEMO`) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)
#> ) AS `LHS`
#> LEFT JOIN (SELECT `SEQN`, `Household_Icome`, `Family_Income`, CASE
#> WHEN (`INDFMPIR` <= 5.0) THEN (`INDFMPIR`)
#> WHEN (((`INDFMPIR`) IS NULL)) THEN (NULL)
#> END AS `Poverty_Income_Ratio`
#> FROM (SELECT `SEQN`, `INDFMPIR`, `Household_Icome`, CASE
#> WHEN (`INDFMIN2` = 1.0) THEN ('$ 0 to $ 4,999')
#> WHEN (`INDFMIN2` = 2.0) THEN ('$ 5,000 to $ 9,999')
#> WHEN (`INDFMIN2` = 3.0) THEN ('$10,000 to $14,999')
#> WHEN (`INDFMIN2` = 4.0) THEN ('$15,000 to $19,999')
#> WHEN (`INDFMIN2` = 5.0) THEN ('$20,000 to $24,999')
#> WHEN (`INDFMIN2` = 6.0) THEN ('$25,000 to $34,999')
#> WHEN (`INDFMIN2` = 7.0) THEN ('$35,000 to $44,999')
#> WHEN (`INDFMIN2` = 8.0) THEN ('$45,000 to $54,999')
#> WHEN (`INDFMIN2` = 9.0) THEN ('$55,000 to $64,999')
#> WHEN (`INDFMIN2` = 10.0) THEN ('$65,000 to $74,999')
#> WHEN (`INDFMIN2` = 12.0) THEN ('$20,000 and Over')
#> WHEN (`INDFMIN2` = 13.0) THEN ('Under $20,000')
#> WHEN (`INDFMIN2` = 14.0) THEN ('$75,000 to $99,999')
#> WHEN (`INDFMIN2` = 15.0) THEN ('$100,000 and Over')
#> WHEN (`INDFMIN2` > 15.0 OR ((`INDFMIN2`) IS NULL)) THEN (NULL)
#> END AS `Family_Income`
#> FROM (SELECT `SEQN`, `INDFMIN2`, `INDFMPIR`, CASE
#> WHEN (`INDHHIN2` = 1.0) THEN ('$ 0 to $ 4,999')
#> WHEN (`INDHHIN2` = 2.0) THEN ('$ 5,000 to $ 9,999')
#> WHEN (`INDHHIN2` = 3.0) THEN ('$10,000 to $14,999')
#> WHEN (`INDHHIN2` = 4.0) THEN ('$15,000 to $19,999')
#> WHEN (`INDHHIN2` = 5.0) THEN ('$20,000 to $24,999')
#> WHEN (`INDHHIN2` = 6.0) THEN ('$25,000 to $34,999')
#> WHEN (`INDHHIN2` = 7.0) THEN ('$35,000 to $44,999')
#> WHEN (`INDHHIN2` = 8.0) THEN ('$45,000 to $54,999')
#> WHEN (`INDHHIN2` = 9.0) THEN ('$55,000 to $64,999')
#> WHEN (`INDHHIN2` = 10.0) THEN ('$65,000 to $74,999')
#> WHEN (`INDHHIN2` = 12.0) THEN ('$20,000 and Over')
#> WHEN (`INDHHIN2` = 13.0) THEN ('Under $20,000')
#> WHEN (`INDHHIN2` = 14.0) THEN ('$75,000 to $99,999')
#> WHEN (`INDHHIN2` = 15.0) THEN ('$100,000 and Over')
#> WHEN (`INDHHIN2` > 15.0 OR ((`INDHHIN2`) IS NULL)) THEN (NULL)
#> END AS `Household_Icome`
#> FROM `DEMO`))) AS `RHS`
#> ON (`LHS`.`SEQN` = `RHS`.`SEQN`)

\(~\)


\(~\)

12.2 Lab Features

# browseURL('https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Laboratory&CycleBeginYear=2017')
ALB_CR <- tbl(NHANES_DB, "ALB_CR")
HDL <- tbl(NHANES_DB, 'HDL')
TRIGLY <- tbl(NHANES_DB, 'TRIGLY')
TCHOL <- tbl(NHANES_DB, "TCHOL")
INS <- tbl(NHANES_DB,"INS") 
GLU <- tbl(NHANES_DB, 'GLU')
UCPREG <- tbl(NHANES_DB,"UCPREG")

12.2.1 Mapping Column Issues

prep_LABS_TBL <- DEMO %>%
  select(SEQN, yr_range) %>%
  left_join(ALB_CR) %>%
  left_join(HDL) %>%
  left_join(TRIGLY) %>%
  left_join(TCHOL) %>%
  left_join(INS, 
            by=c("SEQN", "yr_range"),
            suffix=c('.TCHOL',''))  %>%
  left_join(GLU, 
            by=c("SEQN", "yr_range"),
            suffix=c('.INS','.GLU')) %>%
  left_join(UCPREG)
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")
prep_LABS_TBL %>%
  glimpse()
#> Rows: ??
#> Columns: 43
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN           <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ~
#> $ yr_range       <chr> "1999-2000", "1999-2000", "1999-2000", "1999-2000", "19~
#> $ URXUMA         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUMS         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUCR         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXCRS         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDACT         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUMA2        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUMA2S       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXUCR2        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUCR2S       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDACT2        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUMALC       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URDUCRLC       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDHDD         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDHDDSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.TCHOL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXTR          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDTRSI        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDL         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDLSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXAPB         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDAPBSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDLM        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDMSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDLN        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDLDNSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXTC          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDTCSI        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXIN.INS      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINSI.INS    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTMN.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINLC        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.GLU   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXGLU         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDGLUSI       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXIN.GLU      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINSI.GLU    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR.GLU   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTMN.GLU   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ URXPREG        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
lab_features <- colnames(prep_LABS_TBL)[!colnames(prep_LABS_TBL) %in% c('SEQN','yr_range')] 
lab_features
#>  [1] "URXUMA"         "URXUMS"         "URXUCR"         "URXCRS"        
#>  [5] "URDACT"         "URXUMA2"        "URDUMA2S"       "URXUCR2"       
#>  [9] "URDUCR2S"       "URDACT2"        "URDUMALC"       "URDUCRLC"      
#> [13] "LBDHDD"         "LBDHDDSI"       "WTSAF2YR.TCHOL" "LBXTR"         
#> [17] "LBDTRSI"        "LBDLDL"         "LBDLDLSI"       "LBXAPB"        
#> [21] "LBDAPBSI"       "LBDLDLM"        "LBDLDMSI"       "LBDLDLN"       
#> [25] "LBDLDNSI"       "LBXTC"          "LBDTCSI"        "WTSAF2YR.INS"  
#> [29] "LBXIN.INS"      "LBDINSI.INS"    "PHAFSTHR.INS"   "PHAFSTMN.INS"  
#> [33] "LBDINLC"        "WTSAF2YR.GLU"   "LBXGLU"         "LBDGLUSI"      
#> [37] "LBXIN.GLU"      "LBDINSI.GLU"    "PHAFSTHR.GLU"   "PHAFSTMN.GLU"  
#> [41] "URXPREG"
lab_features_tibble <- tibble(lab_features)

lab_features_tibble %>%
  head()
#> # A tibble: 6 x 1
#>   lab_features
#>   <chr>       
#> 1 URXUMA      
#> 2 URXUMS      
#> 3 URXUCR      
#> 4 URXCRS      
#> 5 URDACT      
#> 6 URXUMA2
lab_features_tibble <- lab_features_tibble %>%
  mutate(appears_in_other_tables = str_detect(lab_features , "[.]"))

lab_features_tibble  %>%
  head()
#> # A tibble: 6 x 2
#>   lab_features appears_in_other_tables
#>   <chr>        <lgl>                  
#> 1 URXUMA       FALSE                  
#> 2 URXUMS       FALSE                  
#> 3 URXUCR       FALSE                  
#> 4 URXCRS       FALSE                  
#> 5 URDACT       FALSE                  
#> 6 URXUMA2      FALSE
lab_features_tibble %>%
  filter(appears_in_other_tables == TRUE) %>%
  separate(lab_features, into=c('feature','SRC_TBL'), sep='[.]')
#> # A tibble: 11 x 3
#>   feature  SRC_TBL appears_in_other_tables
#>   <chr>    <chr>   <lgl>                  
#> 1 WTSAF2YR TCHOL   TRUE                   
#> 2 WTSAF2YR INS     TRUE                   
#> 3 LBXIN    INS     TRUE                   
#> 4 LBDINSI  INS     TRUE                   
#> 5 PHAFSTHR INS     TRUE                   
#> 6 PHAFSTMN INS     TRUE                   
#> # ... with 5 more rows
lab_features_tibble <- tibble(lab_features) %>%
  mutate(tmp = lab_features) %>%
  separate(tmp, 
           into=c('feature','SRC_TBL'), 
           sep='[.]')
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 30 rows [1, 2, 3,
#> 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, ...].

lab_features_tibble
#> # A tibble: 41 x 3
#>   lab_features feature SRC_TBL
#>   <chr>        <chr>   <chr>  
#> 1 URXUMA       URXUMA  <NA>   
#> 2 URXUMS       URXUMS  <NA>   
#> 3 URXUCR       URXUCR  <NA>   
#> 4 URXCRS       URXCRS  <NA>   
#> 5 URDACT       URDACT  <NA>   
#> 6 URXUMA2      URXUMA2 <NA>   
#> # ... with 35 more rows
lab_features_tibble %>%
  filter(feature == 'WTSAF2YR')
#> # A tibble: 3 x 3
#>   lab_features   feature  SRC_TBL
#>   <chr>          <chr>    <chr>  
#> 1 WTSAF2YR.TCHOL WTSAF2YR TCHOL  
#> 2 WTSAF2YR.INS   WTSAF2YR INS    
#> 3 WTSAF2YR.GLU   WTSAF2YR GLU
lab_features_to_map <- lab_features_tibble %>%
  filter(!is.na(SRC_TBL)) %>%
  arrange(feature)

lab_features_to_map
#> # A tibble: 11 x 3
#>   lab_features feature  SRC_TBL
#>   <chr>        <chr>    <chr>  
#> 1 LBDINSI.INS  LBDINSI  INS    
#> 2 LBDINSI.GLU  LBDINSI  GLU    
#> 3 LBXIN.INS    LBXIN    INS    
#> 4 LBXIN.GLU    LBXIN    GLU    
#> 5 PHAFSTHR.INS PHAFSTHR INS    
#> 6 PHAFSTHR.GLU PHAFSTHR GLU    
#> # ... with 5 more rows
lab_features_to_map %>%
  filter(feature == 'WTSAF2YR')
#> # A tibble: 3 x 3
#>   lab_features   feature  SRC_TBL
#>   <chr>          <chr>    <chr>  
#> 1 WTSAF2YR.TCHOL WTSAF2YR TCHOL  
#> 2 WTSAF2YR.INS   WTSAF2YR INS    
#> 3 WTSAF2YR.GLU   WTSAF2YR GLU
prep_LABS_TBL %>%
  filter(!is.na(WTSAF2YR.TCHOL) | !is.na(WTSAF2YR.INS) | !is.na(WTSAF2YR.GLU)) %>%
  select(contains('WTSAF2YR'))
#> # Source:   lazy query [?? x 3]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>   WTSAF2YR.TCHOL WTSAF2YR.INS WTSAF2YR.GLU
#>            <dbl>        <dbl>        <dbl>
#> 1             0            NA           0 
#> 2         67557.           NA       67557.
#> 3         80194.           NA       80194.
#> 4         15668.           NA       15668.
#> 5         93400.           NA       93400.
#> 6             0            NA           0 
#> # ... with more rows
LABS_TBL <- prep_LABS_TBL %>%
  mutate(WTSAF2YR = coalesce(WTSAF2YR.INS, WTSAF2YR.TCHOL , WTSAF2YR.GLU))

LABS_TBL %>%
  select(yr_range, WTSAF2YR, WTSAF2YR.TCHOL, WTSAF2YR.INS, WTSAF2YR.GLU) %>%
  filter(!is.na(WTSAF2YR))
#> # Source:   lazy query [?? x 5]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>   yr_range  WTSAF2YR WTSAF2YR.TCHOL WTSAF2YR.INS WTSAF2YR.GLU
#>   <chr>        <dbl>          <dbl>        <dbl>        <dbl>
#> 1 2005-2006       0              0            NA           0 
#> 2 2005-2006   67557.         67557.           NA       67557.
#> 3 2005-2006   80194.         80194.           NA       80194.
#> 4 2005-2006   15668.         15668.           NA       15668.
#> 5 2005-2006   93400.         93400.           NA       93400.
#> 6 2005-2006       0              0            NA           0 
#> # ... with more rows
lab_features_to_map %>%
  filter(feature == 'LBDINSI')
#> # A tibble: 2 x 3
#>   lab_features feature SRC_TBL
#>   <chr>        <chr>   <chr>  
#> 1 LBDINSI.INS  LBDINSI INS    
#> 2 LBDINSI.GLU  LBDINSI GLU
prep_LABS_TBL %>%
  mutate(LBDINSI = coalesce(LBDINSI.INS, LBDINSI.GLU)) %>%
  filter(!is.na(LBDINSI.INS) | !is.na(LBDINSI.GLU) ) %>%
  select(contains('LBDINSI'))
#> # Source:   lazy query [?? x 3]
#> # Database: sqlite 3.35.5
#> #   [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#>   LBDINSI.INS LBDINSI.GLU LBDINSI
#>         <dbl>       <dbl>   <dbl>
#> 1          NA        60.2    60.2
#> 2          NA        53.9    53.9
#> 3          NA        67.6    67.6
#> 4          NA        87.1    87.1
#> 5          NA        62.8    62.8
#> 6          NA        23.5    23.5
#> # ... with more rows
check_LABS_TBL <- prep_LABS_TBL %>%
  mutate(WTSAF2YR = coalesce(WTSAF2YR.TCHOL, WTSAF2YR.INS, WTSAF2YR.GLU)) %>%
  mutate(PHAFSTMN = coalesce(PHAFSTMN.INS, PHAFSTMN.GLU)) %>%
  mutate(PHAFSTHR = coalesce(PHAFSTHR.INS, PHAFSTHR.GLU)) %>%
  mutate(LBXIN = coalesce(LBXIN.INS,LBXIN.GLU)) %>%
  mutate(LBDINSI = coalesce(LBDINSI.INS,LBDINSI.GLU))

check_maped_cols <- check_LABS_TBL %>%
  select(contains(c('WTSAF2YR','PHAFSTMN','PHAFSTHR','LBXIN','LBDINSI'))) %>%
  colnames()

check_maped_cols
#>  [1] "WTSAF2YR.TCHOL" "WTSAF2YR.INS"   "WTSAF2YR.GLU"   "WTSAF2YR"      
#>  [5] "PHAFSTMN.INS"   "PHAFSTMN.GLU"   "PHAFSTMN"       "PHAFSTHR.INS"  
#>  [9] "PHAFSTHR.GLU"   "PHAFSTHR"       "LBXIN.INS"      "LBXIN.GLU"     
#> [13] "LBXIN"          "LBDINSI.INS"    "LBDINSI.GLU"    "LBDINSI"
check_LABS_TBL %>%
  select(all_of(check_maped_cols)) %>%
  filter_at(.vars=check_maped_cols, any_vars(!is.na(.))) %>%
  glimpse()
#> Rows: ??
#> Columns: 16
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ WTSAF2YR.TCHOL <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00~
#> $ WTSAF2YR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ WTSAF2YR.GLU   <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00~
#> $ WTSAF2YR       <dbl> 0.000, 67556.810, 80193.962, 15668.017, 93399.539, 0.00~
#> $ PHAFSTMN.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTMN.GLU   <dbl> 3, 9, 29, 32, 35, NA, NA, 54, 17, 54, 33, 0, 27, 35, 22~
#> $ PHAFSTMN       <dbl> 3, 9, 29, 32, 35, NA, NA, 54, 17, 54, 33, 0, 27, 35, 22~
#> $ PHAFSTHR.INS   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR.GLU   <dbl> 10, 14, 11, 12, 14, NA, NA, 9, 3, 10, 11, 14, 13, 9, 14~
#> $ PHAFSTHR       <dbl> 10, 14, 11, 12, 14, NA, NA, 9, 3, 10, 11, 14, 13, 9, 14~
#> $ LBXIN.INS      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBXIN.GLU      <dbl> NA, 10.03, 8.99, 11.27, 14.51, NA, NA, 10.46, 3.91, 6.0~
#> $ LBXIN          <dbl> NA, 10.03, 8.99, 11.27, 14.51, NA, NA, 10.46, 3.91, 6.0~
#> $ LBDINSI.INS    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDINSI.GLU    <dbl> NA, 60.18, 53.94, 67.62, 87.06, NA, NA, 62.76, 23.46, 3~
#> $ LBDINSI        <dbl> NA, 60.18, 53.94, 67.62, 87.06, NA, NA, 62.76, 23.46, 3~

12.2.2 Lab Features Final

LABS_TBL <- check_LABS_TBL %>% 
  select(-WTSAF2YR.TCHOL, -WTSAF2YR.INS, -WTSAF2YR.GLU) %>%
  select(-PHAFSTMN.INS, -PHAFSTMN.GLU) %>%
  select(-PHAFSTHR.INS, -PHAFSTHR.GLU) %>%
  select(-LBXIN.INS , -LBXIN.GLU) %>%
  select(-LBDINSI.INS , -LBDINSI.GLU)


LABS_TBL  %>%
  filter_at(vars(lab_features_tibble$feature), any_vars(!is.na(.))) %>%
  glimpse()
#> Rows: ??
#> Columns: 37
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN     <dbl> 31128, 31129, 31130, 31131, 31132, 31133, 31134, 31137, 31139~
#> $ yr_range <chr> "2005-2006", "2005-2006", "2005-2006", "2005-2006", "2005-200~
#> $ URXUMA   <dbl> 82.1, 17.8, NA, 18.0, 6.5, 25.2, 10.1, 287.5, 5.9, 17.5, 5.1,~
#> $ URXUMS   <dbl> 82.1, 17.8, NA, 18.0, 6.5, 25.2, 10.1, 287.5, 5.9, 17.5, 5.1,~
#> $ URXUCR   <dbl> 291, 288, NA, 202, 162, 275, 140, 329, 125, 176, 91, 74, 143,~
#> $ URXCRS   <dbl> 25724, 25459, NA, 17857, 14321, 24310, 12376, 29084, 11050, 1~
#> $ URDACT   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URXUMA2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUMA2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URXUCR2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUCR2S <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDACT2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUMALC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ URDUCRLC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDHDD   <dbl> 55, 46, NA, 39, 59, 54, 49, 45, NA, 57, NA, NA, 44, 45, 48, 4~
#> $ LBDHDDSI <dbl> 1.42, 1.19, NA, 1.01, 1.53, 1.40, 1.27, 1.16, NA, 1.47, NA, N~
#> $ LBXTR    <dbl> NA, NA, NA, 86, 65, 61, 195, NA, NA, NA, NA, NA, NA, NA, NA, ~
#> $ LBDTRSI  <dbl> NA, NA, NA, 0.971, 0.734, 0.689, 2.202, NA, NA, NA, NA, NA, N~
#> $ LBDLDL   <dbl> NA, NA, NA, 49, 75, 81, 98, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDLSI <dbl> NA, NA, NA, 1.267, 1.940, 2.095, 2.534, NA, NA, NA, NA, NA, N~
#> $ LBXAPB   <dbl> NA, NA, NA, 50, 75, 75, 111, NA, NA, NA, NA, NA, NA, NA, NA, ~
#> $ LBDAPBSI <dbl> NA, NA, NA, 0.50, 0.75, 0.75, 1.11, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDLM  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDMSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDLN  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBDLDNSI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBXTC    <dbl> 129, 170, NA, 105, 147, 147, 186, 129, NA, 141, NA, NA, 108, ~
#> $ LBDTCSI  <dbl> 3.34, 4.40, NA, 2.72, 3.80, 3.80, 4.81, 3.34, NA, 3.65, NA, N~
#> $ LBDINLC  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBXGLU   <dbl> NA, NA, NA, 90, 157, 84, 100, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ LBDGLUSI <dbl> NA, NA, NA, 4.996, 8.715, 4.663, 5.551, NA, NA, NA, NA, NA, N~
#> $ URXPREG  <dbl> NA, NA, NA, 2, NA, 2, NA, 2, 2, NA, NA, NA, NA, NA, NA, NA, N~
#> $ WTSAF2YR <dbl> NA, NA, 0.00, 67556.81, 80193.96, 15668.02, 93399.54, NA, 0.0~
#> $ PHAFSTMN <dbl> NA, NA, 3, 9, 29, 32, 35, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ PHAFSTHR <dbl> NA, NA, 10, 14, 11, 12, 14, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ LBXIN    <dbl> NA, NA, NA, 10.03, 8.99, 11.27, 14.51, NA, NA, NA, NA, NA, NA~
#> $ LBDINSI  <dbl> NA, NA, NA, 60.18, 53.94, 67.62, 87.06, NA, NA, NA, NA, NA, N~

\(~\)


\(~\)

12.3 Examination Features

# browseURL('https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Examination&CycleBeginYear=2017')
BPX <- tbl(NHANES_DB, 'BPX')
BMX <- tbl(NHANES_DB,'BMX')

12.3.1 Examination Features

MEASURES_TBL <- DEMO %>%
  select(SEQN, yr_range) %>%
  left_join(BPX) %>%
  left_join(BMX)
#> Joining, by = c("SEQN", "yr_range")
#> Joining, by = c("SEQN", "yr_range")

MEASURES_TBL %>%
  glimpse()
#> Rows: ??
#> Columns: 84
#> Database: sqlite 3.35.5 [C:\Users\jkyle\Documents\GitHub\Jeff_Data_Mgt_R_PRV\DATA\sql_db\NHANES_DB.db]
#> $ SEQN     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18~
#> $ yr_range <chr> "1999-2000", "1999-2000", "1999-2000", "1999-2000", "1999-200~
#> $ PEASCST1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
#> $ PEASCTM1 <dbl> 151, 764, 571, 47, 694, 581, 852, 807, 579, 584, 751, 1169, 7~
#> $ PEASCCT1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
#> $ BPXCHR   <dbl> 110, NA, NA, 108, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
#> $ BPQ150A  <dbl> NA, 2, 2, NA, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, NA, NA, NA,~
#> $ BPQ150B  <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPQ150C  <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPQ150D  <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPAARM   <dbl> NA, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA,~
#> $ BPACSZ   <dbl> NA, 3, 2, NA, 4, 2, 4, 2, 3, 4, 3, 4, 2, 3, 3, 3, NA, NA, NA,~
#> $ BPXPLS   <dbl> NA, 68, 104, NA, 66, 70, 58, 96, 84, 58, 62, 64, 102, 72, 68,~
#> $ BPXDB    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BPXPULS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
#> $ BPXPTY   <dbl> NA, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA,~
#> $ BPXML1   <dbl> NA, 130, 140, NA, 140, 140, 160, 130, 140, 190, 130, 200, 170~
#> $ BPXSY1   <dbl> NA, 106, 110, NA, 122, 116, 130, NA, 104, 152, 110, 182, 140,~
#> $ BPXDI1   <dbl> NA, 58, 60, NA, 82, 64, 78, NA, 60, 98, 52, 108, 78, 56, 68, ~
#> $ BPAEN1   <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPXSY2   <dbl> NA, 98, 104, NA, 122, 116, 122, 106, 114, 142, 110, 172, 130,~
#> $ BPXDI2   <dbl> NA, 56, 64, NA, 84, 60, 80, 44, 52, 94, 52, 98, 62, NA, 68, 5~
#> $ BPAEN2   <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPXSY3   <dbl> NA, 98, 112, NA, 122, 112, 124, 100, 110, 142, 104, 176, 130,~
#> $ BPXDI3   <dbl> NA, 56, 62, NA, 82, 80, 82, 48, 48, 96, 50, 100, 70, 58, 70, ~
#> $ BPAEN3   <dbl> NA, 2, 2, NA, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA,~
#> $ BPXSY4   <dbl> NA, NA, NA, NA, NA, NA, NA, 96, NA, NA, NA, NA, NA, 138, NA, ~
#> $ BPXDI4   <dbl> NA, NA, NA, NA, NA, NA, NA, 56, NA, NA, NA, NA, NA, 64, NA, N~
#> $ BPAEN4   <dbl> NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, 2, NA, NA,~
#> $ BPXSAR   <dbl> NA, 98, 108, NA, 122, 114, 123, 98, 112, 142, 107, 174, 130, ~
#> $ BPXDAR   <dbl> NA, 56, 63, NA, 83, 70, 81, 52, 50, 95, 51, 99, 66, 61, 69, 6~
#> $ BMAEXLEN <dbl> 289, 376, 199, 170, 277, 252, 303, 257, 178, 218, 311, 274, 2~
#> $ BMAEXSTS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
#> $ BMAEXCMT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXWT    <dbl> 12.5, 75.4, 32.9, 13.3, 92.5, 59.2, 78.0, 40.7, 45.5, 111.8, ~
#> $ BMIWT    <dbl> 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3,~
#> $ BMXRECUM <dbl> 93.2, NA, NA, 87.1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMIRECUM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXHEAD  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMIHEAD  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXHT    <dbl> 91.6, 174.0, 136.6, NA, 178.3, 162.0, 162.9, 162.0, 156.9, 19~
#> $ BMIHT    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXBMI   <dbl> 14.90, 24.90, 17.63, NA, 29.10, 22.56, 29.39, 15.51, 18.48, 3~
#> $ BMXLEG   <dbl> NA, NA, 34.1, NA, 45.2, 39.7, 43.0, 41.6, 42.2, 46.6, 40.5, 4~
#> $ BMILEG   <dbl> NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
#> $ BMXCALF  <dbl> NA, 37.5, 29.0, NA, 42.6, 34.0, 37.2, 30.0, 32.7, 43.7, 37.8,~
#> $ BMICALF  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXARML  <dbl> 18.6, 38.2, 25.5, 20.4, 39.7, 34.5, 38.1, 36.7, 32.0, 43.0, 3~
#> $ BMIARML  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXARMC  <dbl> 15.2, 29.8, 19.7, 16.4, 35.8, 26.0, 31.7, 20.1, 22.1, 37.6, 2~
#> $ BMIARMC  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXWAIST <dbl> 45.7, 98.0, 64.7, NA, 99.9, 81.6, 90.7, 64.1, 64.6, 108.0, 76~
#> $ BMIWAIST <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA~
#> $ BMXTHICR <dbl> NA, NA, 38.2, NA, 56.2, 47.0, 55.7, 39.1, 45.3, 64.0, 39.9, 5~
#> $ BMITHICR <dbl> NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA,~
#> $ BMXTRI   <dbl> 8.3, 12.8, 8.0, 10.8, 17.4, 20.3, 26.4, 6.2, 11.1, 15.5, 8.0,~
#> $ BMITRI   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSUB   <dbl> 5.0, 20.4, 6.5, 10.2, 38.6, 16.8, 34.2, 5.2, 7.6, 26.6, 7.8, ~
#> $ BMISUB   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 1, 1, NA, ~
#> $ BMAAMP   <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
#> $ BMAUREXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMAUPREL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMAULEXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMAUPLEL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALOREX <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALORKN <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALLEXT <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMALLKNE <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDSTATS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDRECUF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDSUBF  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDTHICF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDLEGF  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDARMLF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDCALFF <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDBMIC  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD1  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD2  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD3  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXSAD4  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDAVSAD <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMDSADCM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMXHIP   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
#> $ BMIHIP   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~

This concludes the section on engineering features.