8 Data processing
8.1 Data validation
Table-based validation.
Functions to update & apply data validation tables, generate data with consistent nomenclature.
1. Labs & activity types
2. Activity media
3. Parameter names & fractions
4. Detection conditions
5. Detection limit types
6. Unit conversion factors
7. Apply screening tables and subset to accepted data
8.1.1 Update detection condition / limit name tables
updateDetCondLimTables(results=irdata$merged_results, detquantlim=irdata$detquantlim, translation_wb=translation_wb,
detConditionTable_startRow=2, detLimitTypeTable_startRow=2)
## [1] "No new ResultDetectionConditionText value(s) identified"
## [1] "No new DetectionQuantitationLimitTypeName value(s) identified."
## [1] "Translation workbook updated & saved."
8.1.2 Determine detection conditions and fill NDs
merged_results_filled=fillMaskedValues(results=irdata$merged_results, detquantlim=irdata$detquantlim, translation_wb=translation_wb,
detLimitTypeTable_sheetname="detLimitTypeTable", detLimitTypeTable_startRow=2,
unitConvTable_sheetname="unitConvTable", unitConvTable_startRow=1, unitConvTable_startCol=1,
lql_fac=0.5, uql_fac=1)
## [1] "Checking for disparities in result units and limit units..."
## [1] "Unit conversion(s) needed between detection limit unit(s) and result unit(s). Checking for new unit conversions..."
## [1] "No new unit combinations detected. Proceeding to unit conversion..."
## Warning in fillMaskedValues(results = irdata$merged_results, detquantlim =
## irdata$detquantlim, : FYI: There are 33 records with both upper and lower
## quantitation limits and is.na(result values). These records have been
## assigned as 'ND's
## [1] "Detection condition counts:"
##
## DET ND NRV OD
## 717579 229705 14567 808
8.1.3 Update lab/activity & media tables
updateLabActMediaTables(merged_results_filled, translation_wb=translation_wb, labNameActivityTable_startRow = 2)
## [1] "labNameActivityTable updated. 0 new lab/activity combinations identified."
## [1] "activityMediaNameTable updated. 0 new media/submedia identified."
## [1] "Translation workbook updated & saved."
8.1.4 Apply screening tables
mrf_screened=applyScreenTable(merged_results_filled,wb=translation_wb,
sheetname="sites",startRow=2, flag_col_name="IR_Site_FLAG", com_col_name="IR_Site_COMMENT")
mrf_screened=applyScreenTable(mrf_screened,wb=translation_wb,
sheetname="detConditionTable",startRow=2, flag_col_name="IR_DetCond_FLAG", com_col_name="IR_DetCond_COMMENT")
mrf_screened=applyScreenTable(mrf_screened,wb=translation_wb,
sheetname="labNameActivityTable",startRow=2,flag_col_name="IR_LabAct_FLAG", com_col_name="IR_LabAct_COMMENT")
mrf_screened=applyScreenTable(mrf_screened,wb=translation_wb,
sheetname="activityMediaNameTable",startRow=1, flag_col_name="IR_Media_FLAG", com_col_name="IR_Media_COMMENT")
8.1.5 Subset data to desired flag types
mrf_sub=subset(mrf_screened,
IR_DetCond_FLAG=="ACCEPT" &
IR_LabAct_FLAG=="ACCEPT" &
IR_Media_FLAG=="ACCEPT" &
IR_Site_FLAG =="ACCEPT")
8.2 Data prep
Generating assessable dataset
- Assigning criteria (by parameter & use via criteria table)
- Convert to consistent units (target units defined by criteria)
- Check that data fractions & activities match assessment targets
- Aggregate to daily values
- Generate correction factors & calculate criteria
- Split data into assessment groups
8.2.1 Update parameter translation tables
updateParamTrans(data=mrf_sub, detquantlim=detquantlim, translation_wb="00-lookup-tables\\ir_translation_workbook_live.xlsx")
8.2.2 Apply parameter translation table
mrf_sub_translated=applyScreenTable(mrf_sub,wb=translation_wb,
sheetname="paramTransTable",startRow=4,flag_col_name="IR_Parameter_FLAG",com_col_name="IR_Parameter_COMMENT",
na_dup_err=F)
## Warning in applyScreenTable(mrf_sub, wb = translation_wb, sheetname =
## "paramTransTable", : NA's generated in IR_Parameter_FLAG. This can occur
## for two reasons: (1) Combinations exist in merged_results that do not exist
## in paramTransTable, or (2) if na_dup_err=FALSE, NA's are likely present
## in domain table IR_FLAG. After making changes in domain table, re-run all
## applyScreenTables on a post-fillMaskedValues object to ensure changes are
## correctly merged.
8.2.3 Subset data to ACCEPT parameters
accept_data=subset(mrf_sub_translated, IR_Parameter_FLAG=="ACCEPT")
Parameter | Count |
---|---|
Aluminum | 7847 |
Ammonia | 36 |
Ammonia-nitrogen | 14080 |
Arsenic | 8174 |
Cadmium | 7839 |
Calcium | 13256 |
Depth, data-logger (ported) | 20838 |
Dissolved oxygen (DO) | 52158 |
Escherichia coli | 11613 |
Magnesium | 13256 |
pH | 43921 |
Temperature, water | 48807 |
Total dissolved solids | 16474 |
8.2.4 Criteria & unit assigments
8.2.4.1 Assign criteria
data_crit=assignCriteria(accept_data, crit_wb="00-lookup-tables\\IR_uses_standards.xlsx",
crit_sheetname="criteria", ss_sheetname="ss_criteria", crit_startRow=1, ss_startRow=1, rm_nocrit=FALSE, print=FALSE)
8.2.4.2 Update unit conversion table
updateUnitConvTable(data_crit, translation_wb, sheetname = "unitConvTable")
## [1] "Translation workbook updated & saved."
## [1] "No new result-criterion unit combinations identified."
8.2.5 Final data prep step
prepped_data=dataPrep(data=data_crit, translation_wb=translation_wb, split_agg_tds=TRUE, crit_wb="00-lookup-tables\\IR_uses_standards.xlsx",
unit_sheetname = "unitConvTable", startRow_unit = 1, cf_formulas_sheetname="cf_formulas", startRow_formulas=1)
## Warning in dataPrep(data = data_crit, translation_wb = translation_wb,
## split_agg_tds = TRUE, : 580976 records being removed due to lack of
## criteria & units in standards table. These may have been purposely passed
## through in assign criteria.
##
## Non-assessed activity type for parameter
## 23273
##
## No result value or detection limit
## 845
## Non-assessed activity type for parameter
## 23273
##
## No result value or detection limit
## 845
## Non-assessed activity type for parameter
## 23273
## Non-assessed fraction or fraction not defined, & fraction specified by criterion
## 4296
##
## Dissolved fraction result (-5%) > total fraction result (+5%)
## 126
## No result value or detection limit
## 845
## Non-assessed activity type for parameter
## 23273
## Non-assessed fraction or fraction not defined, & fraction specified by criterion
## 4296
##
## Dissolved fraction result (-5%) > total fraction result (+5%)
## 126
## Inappropriate units for conversion or assessment
## 21224
## No result value or detection limit
## 845
## Non-assessed activity type for parameter
## 23273
## Non-assessed fraction or fraction not defined, & fraction specified by criterion
## 4296
##
## Dissolved fraction result (-5%) > total fraction result (+5%)
## 126
## Inappropriate units for conversion or assessment
## 21224
## No result value or detection limit
## 845
## Non-assessed activity type for parameter
## 23273
## Non-assessed fraction or fraction not defined, & fraction specified by criterion
## 4296
## Non-detect result with detection limit > criterion.
## 9906
## [1] "Data prep record ACCEPT/REJECT counts:"
##
## ACCEPT REJECT
## 660726 74360
attach(prepped_data)
## The following objects are masked from prepped_data (pos = 4):
##
## accepted_data, agg_tds, conventionals, data_flags, ecoli,
## flag_reasons, lake_profiles, lakes_tds, lakes_trophic, toxics
8.2.6 Write processed data
data_list=prepped_data[c('agg_tds','conventionals','ecoli','lake_profiles','lakes_tds','lakes_trophic','toxics')]
merged_data=plyr::rbind.fill(data_list)
criteria=unique(merged_data[,c('ActivityStartDate','IR_MLID','IR_MLNAME','BeneficialUse','R3172ParameterName','CriterionUnits','TargetFraction','CriterionLabel','CriterionType','AsmntAggFun','NumericCriterion')])
merged_data=unique(merged_data[,c('ActivityStartDate','IR_MLID','IR_MLNAME','ASSESS_ID','AU_NAME','AU_Type','BEN_CLASS','R3172ParameterName','IR_Value','IR_Unit','IR_DetCond','IR_Fraction','IR_ActivityType','IR_Lat','IR_Long','DataLoggerLine','ActivityRelativeDepthName',
'ActivityDepthHeightMeasure.MeasureValue','R317Descrp','ActivityDepthHeightMeasure.MeasureUnitCode','cf_max_pH_pH units','cf_max_Temperature, water_deg C','cf_min_Calcium_mg/l','cf_min_Magnesium_mg/l','cf_min_pH_pH units',
'hardness')])
save(merged_data, criteria, file='03-processed-data/prepped_merged_data.Rdata')