diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index d9acd2a48..62a8dbac6 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,8 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - [develop] + branches: [develop] pull_request: branches: develop # allow manual action trigger diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 67b85cdd7..26a653ab3 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -2,8 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - [develop] + branches: [develop] pull_request: branches: [develop] diff --git a/R/ATTAINSCrosswalks.R b/R/ATTAINSCrosswalks.R index 636752910..ed59cb7ec 100644 --- a/R/ATTAINSCrosswalks.R +++ b/R/ATTAINSCrosswalks.R @@ -1257,10 +1257,10 @@ TADA_CreateParamRef <- function(.data, org_id = NULL, paramRef = NULL, auto_assi # Excel ref files to be stored in the Downloads folder location. # Define the OneDrive Downloads path onedrive_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "OneDrive", "Downloads", "myfileRef.xlsx") - + # Define the default Downloads path default_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "Downloads", "myfileRef.xlsx") - + # Check if the OneDrive Downloads path exists, and prioritize it if (file.exists(onedrive_downloads_path)) { downloads_path <- onedrive_downloads_path @@ -1706,7 +1706,7 @@ TADA_CreateUseParamRef <- function(.data, org_id = NULL, paramRef = NULL, usePar if (is.data.frame(useParamRef)) { col.names <- c( - "ATTAINS.OrganizationIdentifier", + "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", "ATTAINS.UseName" ) @@ -1940,7 +1940,7 @@ TADA_CreateUseParamRef <- function(.data, org_id = NULL, paramRef = NULL, usePar !paste(ATTAINS.OrganizationIdentifier, ATTAINS.ParameterName, ATTAINS.UseName) %in% paste(ATTAINS_param_all$ATTAINS.OrganizationIdentifier, ATTAINS_param_all$ATTAINS.ParameterName, ATTAINS_param_all$ATTAINS.UseName) & ATTAINS.UseName %in% ATTAINS_param_all$ATTAINS.UseName ~ "Use name is listed as a prior cause in ATTAINS for this organization, but not for this parameter name.", - TRUE ~ + TRUE ~ "Use name is not listed as a prior cause in ATTAINS." )) %>% dplyr::mutate(Flag.UseInput = dplyr::case_when( @@ -1970,10 +1970,10 @@ TADA_CreateUseParamRef <- function(.data, org_id = NULL, paramRef = NULL, usePar # Define the OneDrive Downloads path onedrive_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "OneDrive", "Downloads", "myfileRef.xlsx") - + # Define the default Downloads path default_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "Downloads", "myfileRef.xlsx") - + # Check if the OneDrive Downloads path exists, and prioritize it if (file.exists(onedrive_downloads_path)) { downloads_path <- onedrive_downloads_path @@ -2859,10 +2859,10 @@ TADA_CreateMLSummaryRef <- function(.data, org_id = NULL, useParamRef = NULL, di # default Downloads file location. # Define the OneDrive Downloads path onedrive_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "OneDrive", "Downloads", "myfileRef.xlsx") - + # Define the default Downloads path default_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "Downloads", "myfileRef.xlsx") - + # Check if the OneDrive Downloads path exists, and prioritize it if (file.exists(onedrive_downloads_path)) { downloads_path <- onedrive_downloads_path @@ -2968,7 +2968,7 @@ TADA_CreateMLSummaryRef <- function(.data, org_id = NULL, useParamRef = NULL, di LongitudeMeasure, LatitudeMeasure, IncludeOrExclude, UniqueSpatialCriteria ) %>% dplyr::distinct() - + # data frame to only display sites that contains the parameter CreateMLSummaryRef2 <- useParamRef %>% tidyr::uncount(weights = length(unique_ML)) %>% @@ -2988,7 +2988,7 @@ TADA_CreateMLSummaryRef <- function(.data, org_id = NULL, useParamRef = NULL, di LongitudeMeasure, LatitudeMeasure, TADA.ParameterInSite.Flag, IncludeOrExclude, UniqueSpatialCriteria ) %>% dplyr::distinct() - + # joins the table back together and flag appropriately CreateMLSummaryRef <- CreateMLSummaryRef %>% # dplyr::bind_rows(CreateMLSummaryRef2) @@ -3008,17 +3008,17 @@ TADA_CreateMLSummaryRef <- function(.data, org_id = NULL, useParamRef = NULL, di ) %>% dplyr::arrange(MonitoringLocationIdentifier) } - + if (displayNA == TRUE && nrow(useParamRef) > 2000 || length(unique_ML) > 2000) { warning(paste0( "displayNA = TRUE: ", "Too many sites or uses and parameters. Cannot assign all uses and parameters to each monitoring sites in the output. ", "Defaulting to displayNA = FALSE" )) - - displayNA = FALSE + + displayNA <- FALSE } - + # If we want to exclude rows of sites with no specified parameters if (displayNA == FALSE) { print(paste0( @@ -3044,7 +3044,7 @@ TADA_CreateMLSummaryRef <- function(.data, org_id = NULL, useParamRef = NULL, di LongitudeMeasure, LatitudeMeasure, TADA.ParameterInSite.Flag, IncludeOrExclude, UniqueSpatialCriteria ) %>% dplyr::distinct() - + CreateMLSummaryRef <- CreateMLSummaryRef2 %>% dplyr::arrange(MonitoringLocationIdentifier) } diff --git a/R/CensoredDataSuite.R b/R/CensoredDataSuite.R index 1d6686828..d4fd2919f 100644 --- a/R/CensoredDataSuite.R +++ b/R/CensoredDataSuite.R @@ -40,22 +40,19 @@ #' #' TADA_IDCensoredData <- function(.data) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # Check if the input data frame is empty - if (nrow(.data) == 0) { - message("The entered data frame is empty. The function will not run.") - return(NULL) # Exit the function early - } - - # check .data has all of the required columns + # check .data is data.frame and has required columns expected_cols <- c( "ResultDetectionConditionText", "DetectionQuantitationLimitTypeName", "TADA.ResultMeasureValueDataTypes.Flag" ) TADA_CheckColumns(.data, expected_cols) + # Check if the input data frame is empty + if (nrow(.data) == 0) { + message("The entered data frame is empty. The function will not run.") + return(NULL) # Exit the function early + } + # this copies det lim result value and unit over to TADA result value and unit # when the result value is TEXT but there is a specific text value that indicates @@ -398,16 +395,7 @@ TADA_SimpleCensoredMethods <- function(.data, nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is", od_multiplier = "null") { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # Check if the input data frame is empty - if (nrow(.data) == 0) { - message("The entered data frame is empty. The function will not run.") - return(NULL) # Exit the function early - } - - # check .data has all of the required columns + # check .data is data.frame and has required columns expected_cols <- c( "ResultDetectionConditionText", "DetectionQuantitationLimitTypeName", @@ -415,6 +403,12 @@ TADA_SimpleCensoredMethods <- function(.data, nd_method = "multiplier", ) TADA_CheckColumns(.data, expected_cols) + # Check if the input data frame is empty + if (nrow(.data) == 0) { + message("The entered data frame is empty. The function will not run.") + return(NULL) # Exit the function early + } + # check that multiplier is provided if method = "multiplier" if (nd_method == "multiplier" & nd_multiplier == "null") { stop("Please provide a multiplier for the lower detection limit handling method of 'multiplier'. Typically, the multiplier value is between 0 and 1.") diff --git a/R/CriteriaMethods.R b/R/CriteriaMethods.R index 5b02e1170..6f4d8aa11 100644 --- a/R/CriteriaMethods.R +++ b/R/CriteriaMethods.R @@ -2,13 +2,13 @@ #' #' Assessment criteria and methodologies used to evaluate water quality vary #' across the country. TADA users can fill out this template to define the specific -#' criteria and methodologies for each parameter and use combination they are -#' interested in analyzing. This table can be filled out manually, auto-populated -#' with uses and parameters from ATTAINS and the input WQP dataframe, or -#' developed with TADA helper functions (recommended).It is recommended to run -#' these three TADA helper functions, [TADA_CreateParamRef()], -#' [TADA_CreateUseParamRef], and [TADA_CreateMLSummaryRef], in that order to -#' generate the Criteria and Methodology table specific for your organization. +#' criteria and methodologies for each parameter and use combination they are +#' interested in analyzing. This table can be filled out manually, auto-populated +#' with uses and parameters from ATTAINS and the input WQP dataframe, or +#' developed with TADA helper functions (recommended).It is recommended to run +#' these three TADA helper functions, [TADA_CreateParamRef()], +#' [TADA_CreateUseParamRef], and [TADA_CreateMLSummaryRef], in that order to +#' generate the Criteria and Methodology table specific for your organization. #' #' This criteria and methodology table will be in a TADA compatible format and #' contain a list of allowable values within each column to define the full @@ -58,21 +58,21 @@ #' should contain a completed crosswalk of use names associated with each assessment unit. #' Users will need to ensure this crosswalk contains the appropriate column names in #' order to run the function. -#' +#' #' @param auto_assign Boolean argument with two possible values: TRUE and FALSE. #' The default value is FALSE. If TRUE, a draft criteria and methods table is -#' generated using default function inputs for [TADA_CreateParamRef()], +#' generated using default function inputs for [TADA_CreateParamRef()], #' [TADA_CreateUseParamRef], and [TADA_CreateMLSummaryRef]. .data and org_id are -#' required inputs for this function if auto_assign = TRUE. It is also -#' recommended to set excel = TRUE when auto_assign = TRUE. The criteria -#' and methodology template should be reviewed carefully and edits can be +#' required inputs for this function if auto_assign = TRUE. It is also +#' recommended to set excel = TRUE when auto_assign = TRUE. The criteria +#' and methodology template should be reviewed carefully and edits can be #' made manually in Excel. When your review is complete, read the file back into -#' R and re-run this function, TADA_DefineCriteriaMethodology, again. This time, +#' R and re-run this function, TADA_DefineCriteriaMethodology, again. This time, #' use the criteriaMethods function input to specify the criteria and methodology #' table that has already been filled out. -#' -#' @param displayUniqueId A Boolean value. If TRUE, this will print all unique -#' TADA.ComparableDataIdentifier in the criteria and methods table output. This is +#' +#' @param displayUniqueId A Boolean value. If TRUE, this will print all unique +#' TADA.ComparableDataIdentifier in the criteria and methods table output. This is #' useful in the alternative options to generate the criteria and methods table #' without the reference tables. #' @@ -132,24 +132,24 @@ #' ) #' } #' -TADA_DefineCriteriaMethodology <- function(.data, - MLSummaryRef = NULL, +TADA_DefineCriteriaMethodology <- function(.data, + MLSummaryRef = NULL, org_id = NULL, # required inputs for the recommended workflow - criteriaMethods = NULL, + criteriaMethods = NULL, auto_assign = FALSE, # ref = c("ATTAINS", "CST", "TADA", "Other") future development to consider additional crosswalk alternatives? - AUMLRef = NULL, + AUMLRef = NULL, useAURef = NULL, # Optional if auto_assign = TRUE - epa304a = FALSE, - displayUniqueId = FALSE, - excel = TRUE, + epa304a = FALSE, + displayUniqueId = FALSE, + excel = TRUE, overwrite = FALSE) { # Excel ref files to be stored in the Downloads folder location. # Define the OneDrive Downloads path onedrive_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "OneDrive", "Downloads", "myfileRef.xlsx") - + # Define the default Downloads path default_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "Downloads", "myfileRef.xlsx") - + # Check if the OneDrive Downloads path exists, and prioritize it if (file.exists(onedrive_downloads_path)) { downloads_path <- onedrive_downloads_path @@ -171,10 +171,10 @@ TADA_DefineCriteriaMethodology <- function(.data, # } # If user supplies criteria methods table, then auto_assign = T for any non-matched values - if ( !is.null(criteriaMethods)) { - auto_assign = TRUE - } - + if (!is.null(criteriaMethods)) { + auto_assign <- TRUE + } + # Invalid function input combos - supply one or the other. # if ( !is.null(MLSummaryRef) && !is.null(criteriaMethods) ) { # stop("TADA_DefineCriteriaMethodology: MLSummaryRef and criteriaMethods are both provided. You can only proceed with one (or none) of these options provided.") @@ -190,7 +190,7 @@ TADA_DefineCriteriaMethodology <- function(.data, if (auto_assign == FALSE && is.null(MLSummaryRef)) { desired_cols <- c( "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", "ATTAINS.UseName", "TADA.ComparableDataIdentifier", - "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", + "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", # Spatial Columns "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", # Criteria Columns @@ -209,11 +209,11 @@ TADA_DefineCriteriaMethodology <- function(.data, cols_to_convert <- c( "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", "ATTAINS.UseName", "TADA.ComparableDataIdentifier", - "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", + "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", # Spatial Columns "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", # Criteria Columns - "AcuteChronic", "EquationBased", + "AcuteChronic", "EquationBased", # Data Sufficiency Columns "AssessPeriod", "Season", "DistrPeriod" ) @@ -301,14 +301,14 @@ TADA_DefineCriteriaMethodology <- function(.data, # Will include all unique TADA Char/ComparableDataIdentifier to be shown in the criteria table MLSummaryRef <- TADA_param %>% dplyr::left_join(MLSummaryRef) - #} + # } # # Commenting out all code related to updateRef for now. See https://github.com/USEPA/EPATADA/issues/667 # # user only updates paramRef. This will update paramRef, useParamRef, and MLSummaryRef based on these modifications. # if (updateRef == "paramRef") { # message(paste0("auto_assign = TRUE and updateRef = paramRef selected. Running TADA_CreateParamRef with use supplied paramRef assignment. Please review this paramRef table output.")) # myfile_ParamRef <- openxlsx::read.xlsx(downloads_path, sheet = "CreateParamRef") - # + # # TADA_ParamRef <- TADA_CreateParamRef( # .data, # org_id = org_id, @@ -316,7 +316,7 @@ TADA_DefineCriteriaMethodology <- function(.data, # auto_assign = "None", # User has now edited the table, turn the auto_assign of in TADA_CreateParamRef # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) - # + # # TADA_UseParamRef <- TADA_CreateUseParamRef( # .data, # org_id = org_id, @@ -324,7 +324,7 @@ TADA_DefineCriteriaMethodology <- function(.data, # auto_assign = TRUE, # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) - # + # # MLSummaryRef <- TADA_CreateMLSummaryRef( # .data, # org_id = org_id, @@ -333,12 +333,12 @@ TADA_DefineCriteriaMethodology <- function(.data, # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) # } - # + # # # user only updates useParamRef. This will update useParamRef, MLSummaryRef based on this modifications. # if (updateRef == "useParamRef") { # message(paste0("auto_assign = TRUE and updateRef = useParamRef selected. Running TADA_CreateParamRef with use supplied paramRef assignment. Please review this paramRef table output.")) # myfile_UseParamRef <- openxlsx::read.xlsx(downloads_path, sheet = "CreateUseParamRef") - # + # # TADA_ParamRef <- TADA_CreateParamRef( # .data, # org_id = org_id, @@ -346,7 +346,7 @@ TADA_DefineCriteriaMethodology <- function(.data, # auto_assign = "All", # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) - # + # # TADA_UseParamRef <- TADA_CreateUseParamRef( # .data, # org_id = org_id, @@ -355,7 +355,7 @@ TADA_DefineCriteriaMethodology <- function(.data, # auto_assign = TRUE, # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) - # + # # MLSummaryRef <- TADA_CreateMLSummaryRef( # .data, # org_id = org_id, @@ -364,12 +364,12 @@ TADA_DefineCriteriaMethodology <- function(.data, # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) # } - # + # # # user only updates MLSummaryRef in excel. This will update MLSummaryRef based on this modifications. # if (updateRef == "MLSummaryRef") { # message(paste0("auto_assign = TRUE and updateRef = MLSummaryRef selected. Running TADA_CreateMLSummaryRef with use supplied paramRef assignment. Please review this paramRef table output.")) # myfile_MLSummaryRef <- openxlsx::read.xlsx(downloads_path, sheet = "CreateMLSummaryRef") - # + # # TADA_ParamRef <- TADA_CreateParamRef( # .data, # org_id = org_id, @@ -377,7 +377,7 @@ TADA_DefineCriteriaMethodology <- function(.data, # auto_assign = "All", # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) - # + # # TADA_UseParamRef <- TADA_CreateUseParamRef( # .data, # org_id = org_id, @@ -386,7 +386,7 @@ TADA_DefineCriteriaMethodology <- function(.data, # auto_assign = TRUE, # excel = excel, overwrite = overwrite # You must include overwrite = TRUE to overwrite the excel file when you first create the excel spreadsheet. # ) - # + # # MLSummaryRef <- TADA_CreateMLSummaryRef( # .data, # org_id = org_id, @@ -489,10 +489,10 @@ TADA_DefineCriteriaMethodology <- function(.data, DistrCount = as.numeric(NA), DistrPeriod = as.character(NA), DistrMinSample = as.numeric(NA), Notes = as.character(NA) ) ) %>% - #tidyr::drop_na(ATTAINS.ParameterName) %>% + # tidyr::drop_na(ATTAINS.ParameterName) %>% dplyr::select( "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", "ATTAINS.UseName", "TADA.ComparableDataIdentifier", - "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", + "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", # Spatial Columns "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", # Criteria Columns @@ -509,11 +509,11 @@ TADA_DefineCriteriaMethodology <- function(.data, col_names_MLSummary <- c( "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", "ATTAINS.UseName", "TADA.ComparableDataIdentifier", - "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", + "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", # Spatial Columns "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", # Criteria Columns - "AcuteChronic", "EquationBased", + "AcuteChronic", "EquationBased", # Data Sufficiency Columns "AssessPeriod", "Season", "DistrPeriod" ) @@ -523,12 +523,12 @@ TADA_DefineCriteriaMethodology <- function(.data, # User wants to populate the criteria table using a user supplied table. # This option will prioritize a user-supplied table, but will include - # all rows for any missing WQP Characteristic (or TADA.ComparableDataIdenftifier) + # all rows for any missing WQP Characteristic (or TADA.ComparableDataIdenftifier) # generated from the auto_assign default values. Users may also append epa 304a values. if (!is.null(criteriaMethods)) { desired_cols <- c( "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", "ATTAINS.UseName", "TADA.ComparableDataIdentifier", - "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", + "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", # Spatial Columns "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", # Criteria Columns @@ -619,9 +619,9 @@ TADA_DefineCriteriaMethodology <- function(.data, "You selected auto_assign == TRUE. A useAURef was provided. ", "Filling in these blanks with ATTAINS.ParameterName and ATTAINS.UseName pulled in from your useAURef. ", "Please review or edit these entries in your crosswalk or remove them/leave them unfilled if not applicable to analysis." - )) + )) } - + # From the user supplied criteriaMethods, fill in any values from the pre-filled MLSummaryRef template generated. definedCriteria <- criteriaMethods %>% dplyr::filter(!is.na(ATTAINS.ParameterName)) %>% @@ -646,7 +646,7 @@ TADA_DefineCriteriaMethodology <- function(.data, } } ) - + # If MLSummaryRef does not get generated, and only a user supplied criteriaMethods table is provided if (nrow(DefineCriteriaMethodology) == 0 && auto_assign == FALSE) { DefineCriteriaMethodology <- criteriaMethods %>% @@ -731,7 +731,7 @@ TADA_DefineCriteriaMethodology <- function(.data, # Generates the excel function (HIGHLY Recommended for users to export) if (excel == TRUE) { wb <- openxlsx::loadWorkbook(wb, downloads_path) - + tryCatch( { openxlsx::addWorksheet(wb, "DefineCriteriaMethodology") @@ -745,10 +745,10 @@ TADA_DefineCriteriaMethodology <- function(.data, openxlsx::addWorksheet(wb, "Index-Criteria", visible = FALSE) } ) - + # IMPORTANT: Set the "DefineCriteriaMethodology" sheet as the active sheet - openxlsx::activeSheet(wb) <- "DefineCriteriaMethodology" - + openxlsx::activeSheet(wb) <- "DefineCriteriaMethodology" + # Set visibility names(wb) openxlsx::sheetVisibility(wb)[1] <- "hidden" @@ -770,7 +770,7 @@ TADA_DefineCriteriaMethodology <- function(.data, columns <- c( "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", "ATTAINS.UseName", "TADA.ComparableDataIdentifier", - "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", + "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", # Spatial Columns "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", # Criteria Columns @@ -980,7 +980,7 @@ TADA_DefineCriteriaMethodology <- function(.data, } TADA_CriteriaDataDictionary() - + cat("File saved to:", gsub("/", "\\\\", downloads_path), "\n") } @@ -999,21 +999,20 @@ TADA_DefineCriteriaMethodology <- function(.data, #' @export #' TADA_CriteriaDataDictionary <- function() { - # Excel ref files to be stored in the Downloads folder location. # Define the OneDrive Downloads path onedrive_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "OneDrive", "Downloads", "myfileRef.xlsx") - + # Define the default Downloads path default_downloads_path <- file.path(Sys.getenv("USERPROFILE"), "Downloads", "myfileRef.xlsx") - + # Check if the OneDrive Downloads path exists, and prioritize it if (file.exists(onedrive_downloads_path)) { downloads_path <- onedrive_downloads_path } else { downloads_path <- default_downloads_path } - + wb <- openxlsx::loadWorkbook(wb, downloads_path) tryCatch( { @@ -1024,38 +1023,38 @@ TADA_CriteriaDataDictionary <- function() { openxlsx::addWorksheet(wb, "DataDictionary") } ) - + # Example data frame data_to_write <- data.frame( ColumnName = c( - "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", - "ATTAINS.UseName", "TADA.ComparableDataIdentifier", "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", - "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", - "AcuteChronic", "EquationBased", "MagnitudeValueLower", "MagnitudeValueUpper", "MagnitudeUnit", - "DurationValue", "DurationUnit", "DurationMethod", "FreqValue", "FreqMethod", - "AssessPeriod", "AssessPeriodStartDate", "AssessPeriodEndDate", - "Season", "SeasonStartDate", "SeasonEndDate", "DistrCount", "DistrPeriod", "DistrMinSample", "Notes" - ), + "ATTAINS.OrganizationIdentifier", "ATTAINS.ParameterName", + "ATTAINS.UseName", "TADA.ComparableDataIdentifier", "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName", + "ATTAINS.WaterType", "SaltFresh", "DepthCategory", "UniqueSpatialCriteria", + "AcuteChronic", "EquationBased", "MagnitudeValueLower", "MagnitudeValueUpper", "MagnitudeUnit", + "DurationValue", "DurationUnit", "DurationMethod", "FreqValue", "FreqMethod", + "AssessPeriod", "AssessPeriodStartDate", "AssessPeriodEndDate", + "Season", "SeasonStartDate", "SeasonEndDate", "DistrCount", "DistrPeriod", "DistrMinSample", "Notes" + ), Requirement = c( - "Required", "Required", "Required", "Recommended", "Required", - "Recommended", "Recommended", "Optional", "Optional", "Optional", "Optional", - "Optional", "Optional", "Required", "Required", "Required", "Optional", "Optional", - "Optional", "Optional", "Optional", "Optional", "Optional", "Optional", "Optional", - "Optional", "Optional", "Optional", "Optional", "Optional", "Optional" + "Required", "Required", "Required", "Recommended", "Required", + "Recommended", "Recommended", "Optional", "Optional", "Optional", "Optional", + "Optional", "Optional", "Required", "Required", "Required", "Optional", "Optional", + "Optional", "Optional", "Optional", "Optional", "Optional", "Optional", "Optional", + "Optional", "Optional", "Optional", "Optional", "Optional", "Optional" ), - Source= c( - "ATTAINS*", "ATTAINS*", "ATTAINS*", "TADA", "TADA", "TADA", "TADA", "User Supplied", - "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", - "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", - "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", - "User Supplied", "User Supplied", "User Supplied" - ), - ColumnType= c( - "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Spatial", - "Spatial", "Spatial", "Spatial", "Spatial", "Criteria", "Criteria", "Criteria", "Criteria", - "Criteria", "Criteria", "Criteria", "Criteria", "Criteria", "Methodology", - "Methodology", "Methodology", "Methodology", "Methodology", "Methodology", "Methodology", - "Methodology", "Methodology", "Methodology" + Source = c( + "ATTAINS*", "ATTAINS*", "ATTAINS*", "TADA", "TADA", "TADA", "TADA", "User Supplied", + "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", + "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", + "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", "User Supplied", + "User Supplied", "User Supplied", "User Supplied" + ), + ColumnType = c( + "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Crosswalk", "Spatial", + "Spatial", "Spatial", "Spatial", "Spatial", "Criteria", "Criteria", "Criteria", "Criteria", + "Criteria", "Criteria", "Criteria", "Criteria", "Criteria", "Methodology", + "Methodology", "Methodology", "Methodology", "Methodology", "Methodology", "Methodology", + "Methodology", "Methodology", "Methodology" ), Description = c( # ATTAINS.OrganizationIdentifier @@ -1067,13 +1066,13 @@ TADA_CriteriaDataDictionary <- function() { # TADA.ComparableDataIdentifier paste0( "To populate this field, specify displayUniqueId = TRUE. Concatenates the WQP Characteristic, Fraction and speciation into one string.", - "If provided, this will crosswalk an ATTAINS.ParameterName to this TADA.ComparableDataIdentifier. ", + "If provided, this will crosswalk an ATTAINS.ParameterName to this TADA.ComparableDataIdentifier. ", "It is recommended to have performed this crosswalk in TADA_CreateParamRef to avoid any duplicated ", - "definition of your organization's criteria if they are the same for multiple TADA.ComparableDataIdentifiers.", + "definition of your organization's criteria if they are the same for multiple TADA.ComparableDataIdentifiers.", collapse = " " - ), + ), # TADA.CharacteristicName - "Name of TADA characteristic in the WQP that gets matched to an ATTAINS parameter.", + "Name of TADA characteristic in the WQP that gets matched to an ATTAINS parameter.", # TADA.ResultSampleFractionText "If TADA.ComparableDataIdentifier is blank, this will group all TADA.CharacteristicName to an ATTAINS.ParameterName on the condition of the specified Fraction Type.", # TADA.MethodSpeciationName @@ -1083,55 +1082,55 @@ TADA_CriteriaDataDictionary <- function() { # SaltFresh "The salt or freshwater classification of the ATTAINS Waterbody Type. Users should specify if a standard only applies to salt or freshwater types.", # DepthCategory - "The depth within water column that a standard applies to if applicable. Users can run TADA.FlagDepthCategory to populate this entry (or can specify a specific unit measurement?).", + "The depth within water column that a standard applies to if applicable. Users can run TADA.FlagDepthCategory to populate this entry (or can specify a specific unit measurement?).", # UniqueSpatialCriteria - "Users should specify any monitoring location sites that may contain a unique spatial critieria for a parameter or use in CreateMLSummaryRef.", + "Users should specify any monitoring location sites that may contain a unique spatial critieria for a parameter or use in CreateMLSummaryRef.", # AcuteChronic - "If a parameter and use depends depends on differing criteria standards for acute or chronic conditions. Acute is defined as short term while chronic is long term.", + "If a parameter and use depends depends on differing criteria standards for acute or chronic conditions. Acute is defined as short term while chronic is long term.", # EquationBased - "If your water quality standards depend on an equation calculated numeric value, the equation column should be specified as yes. Users will need to specify either a custom equation or choose from a list of common equations and define each equation parameter appropriately. NOTE: Equation handling in TADA is still in development.", + "If your water quality standards depend on an equation calculated numeric value, the equation column should be specified as yes. Users will need to specify either a custom equation or choose from a list of common equations and define each equation parameter appropriately. NOTE: Equation handling in TADA is still in development.", # MagnitudeValueLower - "The lower limit of the amount or concentration of a pollutant or parameter that is allowable in water. An excursion or exceedance occurs if a ResultValue falls below this defined lower limit for this parameter and use.", + "The lower limit of the amount or concentration of a pollutant or parameter that is allowable in water. An excursion or exceedance occurs if a ResultValue falls below this defined lower limit for this parameter and use.", # MagnitudeValueUpper - "The upper limit of the amount or concentration of a pollutant or parameter that is allowable in water. An excursion or exceedance occurs if a ResultValue falls above this defined lower limit for this parameter and use.", + "The upper limit of the amount or concentration of a pollutant or parameter that is allowable in water. An excursion or exceedance occurs if a ResultValue falls above this defined lower limit for this parameter and use.", # MagnitudeUnit "Defines the units component of the amount or concentration of a pollutant or parameter that is allowable in water.", # DurationValue - "The numeric value component of the length of time in which a waterbody can be exposed to a magnitude of a parameter without negatively impacting its designated use.", + "The numeric value component of the length of time in which a waterbody can be exposed to a magnitude of a parameter without negatively impacting its designated use.", # DurationUnit - "The units component of the length of time in which a waterbody can be exposed to a magnitude of a parameter without negatively impacting its designated use.", + "The units component of the length of time in which a waterbody can be exposed to a magnitude of a parameter without negatively impacting its designated use.", # DurationMethod "The specific aggregation calculation of samples that are collected during a duration period.", # FreqValue - "The numeric value of how often a magnitude value can be exceeded before being considered impaired.", + "The numeric value of how often a magnitude value can be exceeded before being considered impaired.", # FreqMethod - "How often a magnitude value can be exceeded percentage or number of times a magnitude value can be exceeded over a specified duration period.", + "How often a magnitude value can be exceeded percentage or number of times a magnitude value can be exceeded over a specified duration period.", # AssessPeriod - "Labels the assessment period of which the WQP data must be collected from. Users should define the assessment date range in the beginning and end date columns that proceeds this one.", + "Labels the assessment period of which the WQP data must be collected from. Users should define the assessment date range in the beginning and end date columns that proceeds this one.", # AssessPeriodStartDate "The start date in which WQP data will be analysed for this parameter and use.", # AssessPeriodEndDate - "The end date in which WQP data will be analysed for this parameter and use.", + "The end date in which WQP data will be analysed for this parameter and use.", # Season - "Labels the season in which the standards apply for this parameter and use. Specify the start and end dates of your season in the proceeding two columns.", + "Labels the season in which the standards apply for this parameter and use. Specify the start and end dates of your season in the proceeding two columns.", # SeasonStartDate - "The start date of the season in which assessments are done for during a calendar year.", + "The start date of the season in which assessments are done for during a calendar year.", # SeasonEndDate - "The end date of the season in which assessments are done for during a calendar year.", + "The end date of the season in which assessments are done for during a calendar year.", # DistrCount - "A numeric value specifying the minimum number of sampling events (consecutive) over a distribution period.", + "A numeric value specifying the minimum number of sampling events (consecutive) over a distribution period.", # DistrPeriod - "The period of time in which samples must be collected during an assessment data window.", + "The period of time in which samples must be collected during an assessment data window.", # DistrMinSample - "How many samples must be collected during each specified DistrPeriod", + "How many samples must be collected during each specified DistrPeriod", # Notes "Additonal free form notes column for any notes that must be considered for this parameter and use that may not be able to be captured in the TADA criteria table format." - ) + ) ) - + # Write the data frame to the worksheet, starting at cell B2 openxlsx::writeData(wb, "DataDictionary", data_to_write, startCol = 2, startRow = 2) - + # Create a style for the header row header_style <- openxlsx::createStyle( fontSize = 12, @@ -1141,19 +1140,19 @@ TADA_CriteriaDataDictionary <- function() { border = "TopBottomLeftRight", borderColour = "#000000" ) - + # Apply the header style to the second row (header) openxlsx::addStyle(wb, "DataDictionary", header_style, rows = 2, cols = 2:(ncol(data_to_write) + 1), gridExpand = TRUE) - + # Create a style for borders on all data cells data_border_style <- openxlsx::createStyle( border = "TopBottomLeftRight", borderColour = "#000000" # Light grey border ) - + # Apply data border style to all data rows and columns besides header openxlsx::addStyle(wb, "DataDictionary", data_border_style, rows = 3:(nrow(data_to_write) + 2), cols = 2:(ncol(data_to_write) + 1), gridExpand = TRUE) - + # Define description text that gets wrapped wrapStyle <- openxlsx::createStyle( border = "TopBottomLeftRight", @@ -1165,12 +1164,10 @@ TADA_CriteriaDataDictionary <- function() { openxlsx::addStyle(wb, "DataDictionary", wrapStyle, rows = 3:(nrow(data_to_write) + 2), cols = ncol(data_to_write) + 1) openxlsx::setColWidths(wb, "DataDictionary", cols = ncol(data_to_write) + 1, widths = 80) # Adjust width as needed - + # Set column widths to automatically fit content, except last column openxlsx::setColWidths(wb, "DataDictionary", cols = 1:(ncol(data_to_write) - 1), widths = "auto") - + # Save the workbook to an Excel file openxlsx::saveWorkbook(wb, downloads_path, overwrite = T) - } - diff --git a/R/DepthProfile.R b/R/DepthProfile.R index 22321fef7..7d1211571 100644 --- a/R/DepthProfile.R +++ b/R/DepthProfile.R @@ -92,14 +92,8 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, surfacevalue = 2, dailyagg = "none", aggregatedonly = FALSE, clean = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - # check aggregatedonly is boolean - TADA_CheckType(aggregatedonly, "logical") - # check clean is boolean - TADA_CheckType(clean, "logical") - # check .data has required columns - TADA_CheckColumns(.data, c( + # check .data is data.frame and has required columns + expected_cols <- c( "TADA.ActivityDepthHeightMeasure.MeasureValue", "TADA.ResultDepthHeightMeasure.MeasureValue", "ActivityRelativeDepthName", @@ -111,7 +105,12 @@ TADA_FlagDepthCategory <- function(.data, bycategory = "no", bottomvalue = 2, "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "ActivityStartDate" - )) + ) + TADA_CheckColumns(.data, expected_cols) + # check aggregatedonly is boolean + TADA_CheckType(aggregatedonly, "logical") + # check clean is boolean + TADA_CheckType(clean, "logical") # execute function after checks are passed @@ -878,7 +877,7 @@ TADA_DepthProfilePlot <- function(.data, rm(param.check) # list required columns - reqcols <- c( + required_cols <- c( "TADA.ResultDepthHeightMeasure.MeasureValue", "TADA.ResultDepthHeightMeasure.MeasureUnitCode", "TADA.ActivityDepthHeightMeasure.MeasureUnitCode", @@ -896,7 +895,7 @@ TADA_DepthProfilePlot <- function(.data, ) # check .data has required columns - TADA_CheckColumns(.data, reqcols) + TADA_CheckColumns(.data, required_cols) print("TADA_DepthProfilePlot: Identifying available depth profile data.") @@ -1028,7 +1027,7 @@ TADA_DepthProfilePlot <- function(.data, # this subset must include all fields included in plot hover below plot.data <- profile.data %>% dplyr::filter(dplyr::if_any(TADA.ComparableDataIdentifier, ~ .x %in% groups)) %>% - dplyr::select(dplyr::all_of(reqcols), "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "TADA.MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText") %>% + dplyr::select(dplyr::all_of(required_cols), "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "TADA.MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText") %>% dplyr::mutate(TADA.ResultMeasure.MeasureUnitCode = ifelse(is.na(TADA.ResultMeasure.MeasureUnitCode), "NA", TADA.ResultMeasure.MeasureUnitCode )) diff --git a/R/ExampleData.R b/R/ExampleData.R index 2018e6fd5..e31ffdb00 100644 --- a/R/ExampleData.R +++ b/R/ExampleData.R @@ -126,7 +126,7 @@ NULL #' Data_MT.UseAURef_Water #' -#' Generate Data_MT.UseAURef_Water used in ExampleMod2Workflow.Rmd and +#' Generate Data_MT.UseAURef_Water used in ExampleMod2Workflow.Rmd and #' ExampleMod3Workflow.Rmd #' #' @docType data diff --git a/R/Figures.R b/R/Figures.R index a4b3a5143..cd437bc7f 100644 --- a/R/Figures.R +++ b/R/Figures.R @@ -60,9 +60,6 @@ #' Boxplot_output[[30]] #' TADA_Boxplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - # ensure comparable data identifier is in the id_cols vector if (is.null(id_cols)) { id_cols <- "TADA.ComparableDataIdentifier" @@ -71,14 +68,9 @@ TADA_Boxplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) { warning("TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended: plotting without it may produce errors in the plot.") } - # check .data has required columns - TADA_CheckColumns(.data, id_cols) - - # check .data has required columns - TADA_CheckColumns(.data, c( - "TADA.ResultMeasureValue", - "TADA.ResultMeasure.MeasureUnitCode" - )) + # check .data is data.frame and has required columns (including id_cols) + expected_cols <- c(id_cols, c("TADA.ResultMeasureValue", "TADA.ResultMeasure.MeasureUnitCode")) + TADA_CheckColumns(.data, expected_cols) # load TADA color palette @@ -244,9 +236,6 @@ TADA_Boxplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) { #' Histogram_output[[30]] #' TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - # ensure comparable data identifier is in the id_cols vector if (is.null(id_cols)) { id_cols <- "TADA.ComparableDataIdentifier" @@ -255,14 +244,9 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) warning("TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended: plotting without it may produce errors in the plot.") } - # check .data has required columns - TADA_CheckColumns(.data, id_cols) - - # check .data has required columns - TADA_CheckColumns(.data, c( - "TADA.ResultMeasureValue", - "TADA.ResultMeasure.MeasureUnitCode" - )) + # check .data is data.frame and has required columns (including id_cols) + expected_cols <- c(id_cols, c("TADA.ResultMeasureValue", "TADA.ResultMeasure.MeasureUnitCode")) + TADA_CheckColumns(.data, expected_cols) tada.pal <- TADA_ColorPalette(col_pair = TRUE) @@ -868,9 +852,6 @@ TADA_FieldValuesPie <- function(.data, field = "null", characteristicName = "nul #' Scatterplot_output[[35]] #' TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - # ensure comparable data identifier is in the id_cols vector if (is.null(id_cols)) { id_cols <- "TADA.ComparableDataIdentifier" @@ -880,15 +861,14 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") warning("TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended: plotting without it may produce errors in the plot.") } - # check .data has required columns - TADA_CheckColumns(.data, id_cols) - - # check .data has required columns - TADA_CheckColumns(.data, c( + # check .data is data.frame and has required columns (including id_cols) + expected_cols <- c(id_cols, c( "ActivityStartDate", "TADA.ResultMeasureValue", "TADA.ResultMeasure.MeasureUnitCode" )) + TADA_CheckColumns(.data, expected_cols) + .data <- .data %>% dplyr::group_by(dplyr::across(dplyr::all_of(id_cols))) %>% @@ -1038,27 +1018,19 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") #' ) #' TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableDataIdentifier", groups) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # check .data has required columns - TADA_CheckColumns(.data, id_cols) - - # check .data has required columns - reqcols <- c( - "TADA.ResultMeasureValue", - "TADA.ResultMeasure.MeasureUnitCode", - "ActivityStartDate" - ) - - # check .data has required columns - TADA_CheckColumns(.data, reqcols) - # if left blank, ensure comparable data identifier is in the id_cols vector if (is.null(id_cols)) { id_cols <- "TADA.ComparableDataIdentifier" } + # check .data is data.frame and has required columns (including id_cols) + expected_cols <- c(id_cols, c( + "ActivityStartDate", + "TADA.ResultMeasureValue", + "TADA.ResultMeasure.MeasureUnitCode" + )) + TADA_CheckColumns(.data, expected_cols) + if (!"TADA.ComparableDataIdentifier" %in% id_cols) { print("Note: TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended.") } @@ -1075,7 +1047,7 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD plot.data <- as.data.frame(.data) # this subset must include all fields included in plot hover below - plot.data <- subset(plot.data, plot.data[, id_cols] %in% groups)[, c(id_cols, reqcols, depthcols, "ActivityStartDateTime", "MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText")] + plot.data <- subset(plot.data, plot.data[, id_cols] %in% groups)[, c(id_cols, expected_cols, depthcols, "ActivityStartDateTime", "MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText")] plot.data$name <- gsub("_NA", "", plot.data[, id_cols]) plot.data$name <- gsub("_", " ", plot.data$name) @@ -1312,11 +1284,8 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD #' TADA_GroupedScatterplot(df, group_col = "MonitoringLocationName") #' TADA_GroupedScatterplot <- function(.data, group_col = "MonitoringLocationName", groups = NULL) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # check .data has required columns - reqcols <- c( + # check .data is data.frame and has required columns (including group_col) + required_cols <- c( "TADA.ComparableDataIdentifier", "TADA.ResultMeasureValue", "TADA.ResultMeasure.MeasureUnitCode", @@ -1324,14 +1293,10 @@ TADA_GroupedScatterplot <- function(.data, group_col = "MonitoringLocationName", "ActivityStartDateTime", "MonitoringLocationName" ) - - # add user-selected group_col to list of required columns - reqcols <- reqcols %>% + required_cols <- required_cols %>% append(group_col) %>% unique() - - # check .data has required columns - TADA_CheckColumns(.data, reqcols) + TADA_CheckColumns(.data, required_cols) # only allows for 1 column selection in id_cols @@ -1420,7 +1385,7 @@ TADA_GroupedScatterplot <- function(.data, group_col = "MonitoringLocationName", plot.data <- as.data.frame(.data) # this subset must include all fields included in plot hover below - plot.data <- subset(plot.data, plot.data[, group_col] %in% groups)[, unique(c(group_col, reqcols, depthcols, "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText"))] + plot.data <- subset(plot.data, plot.data[, group_col] %in% groups)[, unique(c(group_col, required_cols, depthcols, "TADA.ComparableDataIdentifier", "ActivityStartDateTime", "MonitoringLocationName", "TADA.ActivityMediaName", "ActivityMediaSubdivisionName", "ActivityRelativeDepthName", "TADA.CharacteristicName", "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText"))] plot.data <- dplyr::arrange(plot.data, ActivityStartDate) diff --git a/R/GeospatialFunctions.R b/R/GeospatialFunctions.R index cdc23480e..fe9fb572a 100644 --- a/R/GeospatialFunctions.R +++ b/R/GeospatialFunctions.R @@ -2062,10 +2062,10 @@ TADA_GetATTAINSByAUID <- function(.data, au_ref = NULL, fill_ATTAINS_catch = FAL ) %>% dplyr::mutate( ATTAINS.WaterType = ifelse(is.na(ATTAINS.WaterType), - "NA", ATTAINS.WaterType + "NA", ATTAINS.WaterType ), Ref.WaterType = ifelse(is.na(Ref.WaterType), - "NA", Ref.WaterType + "NA", Ref.WaterType ), Mismatch = ATTAINS.WaterType != Ref.WaterType ) %>% @@ -2088,7 +2088,7 @@ TADA_GetATTAINSByAUID <- function(.data, au_ref = NULL, fill_ATTAINS_catch = FAL dplyr::pull() mismatch.text <- stringi::stri_replace_last(paste(mismatch.text, collapse = "; "), - fixed = "; ", " and " + fixed = "; ", " and " ) print(paste0( @@ -2186,7 +2186,6 @@ TADA_GetATTAINSByAUID <- function(.data, au_ref = NULL, fill_ATTAINS_catch = FAL #' } #' TADA_ViewATTAINS <- function(.data, ref_icons = TRUE) { - if (!any(c( "ATTAINS_catchments", "ATTAINS_points", "ATTAINS_lines", "ATTAINS_polygons" @@ -2253,11 +2252,11 @@ TADA_ViewATTAINS <- function(.data, ref_icons = TRUE) { missing_raw_features <- NULL try(missing_raw_features <- ATTAINS_catchments %>% - dplyr::filter(!assessmentunitidentifier %in% c( - ATTAINS_points$assessmentunitidentifier, - ATTAINS_lines$assessmentunitidentifier, - ATTAINS_polygons$assessmentunitidentifier - )), silent = TRUE) + dplyr::filter(!assessmentunitidentifier %in% c( + ATTAINS_points$assessmentunitidentifier, + ATTAINS_lines$assessmentunitidentifier, + ATTAINS_polygons$assessmentunitidentifier + )), silent = TRUE) if (!"without_ATTAINS_catchments" %in% names(.data)) { if (nrow(ATTAINS_table) == 0) { @@ -2288,7 +2287,7 @@ TADA_ViewATTAINS <- function(.data, ref_icons = TRUE) { suppressMessages(suppressWarnings({ # if data was spatial, remove for downstream leaflet dev: try(ATTAINS_table <- ATTAINS_table %>% - sf::st_drop_geometry(), silent = TRUE) + sf::st_drop_geometry(), silent = TRUE) tada.pal <- TADA_ColorPalette() @@ -2345,8 +2344,8 @@ TADA_ViewATTAINS <- function(.data, ref_icons = TRUE) { Organization_Count = length(unique(OrganizationIdentifier)), ATTAINS_AUs = as.character(list(unique(ATTAINS.AssessmentUnitIdentifier))), TADA.AURefSource = ifelse("TADA.AURefSource" %in% names(ATTAINS_table), - as.character(TADA.AURefSource), - "not provided" + as.character(TADA.AURefSource), + "not provided" ) ) %>% dplyr::mutate( @@ -2358,11 +2357,11 @@ TADA_ViewATTAINS <- function(.data, ref_icons = TRUE) { # Basemap for AOI: map <- leaflet::leaflet() %>% leaflet::addProviderTiles("Esri.WorldTopoMap", - group = "World topo", - options = leaflet::providerTileOptions( - updateWhenZooming = FALSE, - updateWhenIdle = TRUE - ) + group = "World topo", + options = leaflet::providerTileOptions( + updateWhenZooming = FALSE, + updateWhenIdle = TRUE + ) ) %>% leaflet::clearShapes() %>% leaflet::fitBounds( @@ -2542,7 +2541,7 @@ TADA_ViewATTAINS <- function(.data, ref_icons = TRUE) { without_ATTAINS_catchments <- NULL try(without_ATTAINS_catchments <- .data[["without_ATTAINS_catchments"]] %>% - dplyr::rename(nhd = 1), silent = TRUE) + dplyr::rename(nhd = 1), silent = TRUE) # Add missing catchment outlines (if they exist): try( @@ -2578,7 +2577,7 @@ TADA_ViewATTAINS <- function(.data, ref_icons = TRUE) { height = 14, orientation = "vertical", title = htmltools::tags$div("Legend", - style = "font-size: 14px; + style = "font-size: 14px; text-align: left; font-weight: bold;" ), position = "bottomright" @@ -2672,20 +2671,14 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100, nhd_res = "Hi", org_hierarchy = "none", meta_select = "random") { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # .data required columns - required_cols <- c( + # check .data is data.frame and has required columns + expected_cols <- c( "TADA.MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure" ) - - # check .data has required columns - TADA_CheckColumns(.data, required_cols) - - rm(required_cols) + TADA_CheckColumns(.data, expected_cols) + rm(expected_cols) # retain only necessary columns unique Monitoring Locations unique.mls <- .data %>% @@ -3147,11 +3140,8 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100, #' #' @export TADA_GetUniqueNearbySites <- function(.data) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # .data required columns - required_cols <- c( + # check .data is data.frame and has required columns + expected_cols <- c( "MonitoringLocationIdentifier", "TADA.MonitoringLocationIdentifier", "MonitoringLocationName", "TADA.MonitoringLocationName", "LongitudeMeasure", "TADA.LongitudeMeasure", @@ -3160,8 +3150,7 @@ TADA_GetUniqueNearbySites <- function(.data) { "MonitoringLocationDescriptionText", "TADA.NearbySites.Flag", "TADA.NearbySiteGroup" ) - # check .data has required columns - TADA_CheckColumns(.data, required_cols) + TADA_CheckColumns(.data, expected_cols) # filter only for locations with nearby sites .data <- .data %>% @@ -3170,7 +3159,7 @@ TADA_GetUniqueNearbySites <- function(.data) { TADA.NearbySites.Flag != "No nearby sites detected using input buffer distance." ) %>% # retain only required columns - dplyr::select(dplyr::all_of(required_cols)) %>% + dplyr::select(dplyr::all_of(expected_cols)) %>% # retain only unique records dplyr::distinct() @@ -3385,7 +3374,6 @@ TADA_CreateAUMLCrosswalk <- function(.data, fill_USGS_catch = FALSE, return_nearest = TRUE, batch_upload = TRUE) { - # create list where all user matches dfs are set to NULL user.matches <- list( "TADA_with_ATTAINS" = NULL, @@ -3397,9 +3385,10 @@ TADA_CreateAUMLCrosswalk <- function(.data, # check to see if user supplied ref is NULL if (is.null(au_ref)) { - - print(paste0("TADA_CreateAUMLCrosswalk: no au_ref (user-supplied crosswalk ", - "was provided.")) + print(paste0( + "TADA_CreateAUMLCrosswalk: no au_ref (user-supplied crosswalk ", + "was provided." + )) } # check to see if user supplied ref is not NULL @@ -3416,9 +3405,10 @@ TADA_CreateAUMLCrosswalk <- function(.data, # check to see if user supplied ref is a data frame if (is.data.frame(au_ref)) { - - print(paste0("TADA_CreateAUMLCrosswalk: fetching ATTAINS geospatial data ", - "for assessment units in the user-supplied crosswalk.")) + print(paste0( + "TADA_CreateAUMLCrosswalk: fetching ATTAINS geospatial data ", + "for assessment units in the user-supplied crosswalk." + )) # list of partial string matches for columns in au_ref @@ -3456,12 +3446,11 @@ TADA_CreateAUMLCrosswalk <- function(.data, dplyr::filter(TADA.MonitoringLocationIdentifier %in% au_ref$ATTAINS.MonitoringLocationIdentifier) %>% dplyr::mutate(TADA.AURefSource = "User-supplied Ref") - if(dim(au.ref.mls)[1] > 0) { - - # get geospatial data for au_ref monitoring locations - user.matches <- spsUtil::quiet( - TADA_GetATTAINSByAUID(au.ref.mls, au_ref = au_ref, fill_ATTAINS_catch = fill_ATTAINS_catch) - ) + if (dim(au.ref.mls)[1] > 0) { + # get geospatial data for au_ref monitoring locations + user.matches <- spsUtil::quiet( + TADA_GetATTAINSByAUID(au.ref.mls, au_ref = au_ref, fill_ATTAINS_catch = fill_ATTAINS_catch) + ) } } } @@ -3534,8 +3523,10 @@ TADA_CreateAUMLCrosswalk <- function(.data, attains.cw.mls <- attains.cw.mls %>% dplyr::mutate(TADA.AURefSource = "ATTAINS Crosswalk") - print(paste0("TADA_CreateAUMLCrosswalk: fetching ATTAINS geospatial data ", - "for assessment units from the ATTAINS crosswalk.")) + print(paste0( + "TADA_CreateAUMLCrosswalk: fetching ATTAINS geospatial data ", + "for assessment units from the ATTAINS crosswalk." + )) # get geospatial data for attains cw monitoring locations attains.matches <- spsUtil::quiet( TADA_GetATTAINSByAUID(attains.cw.mls, au_ref = attains.cw, fill_ATTAINS_catch = fill_ATTAINS_catch) @@ -3547,8 +3538,10 @@ TADA_CreateAUMLCrosswalk <- function(.data, # TADA_CreateATTAINSAUMLCrosswalk section - print(paste0("TADA_CreateAUMLCrosswalk: checking to see if any unmatched ", - "monitoring locations remain in the original TADA data frame.")) + print(paste0( + "TADA_CreateAUMLCrosswalk: checking to see if any unmatched ", + "monitoring locations remain in the original TADA data frame." + )) get.attains.mls <- .data @@ -3568,8 +3561,10 @@ TADA_CreateAUMLCrosswalk <- function(.data, # add code here for if there are no remaining mls to match if (dim(get.attains.mls)[1] == 0) { - print(paste0("TADA_CreateAUMLCrosswalk: all monitorintg locations have ", - "already been matched to an assessment unit by the user or ATTAINS.")) + print(paste0( + "TADA_CreateAUMLCrosswalk: all monitorintg locations have ", + "already been matched to an assessment unit by the user or ATTAINS." + )) get.attains.matches <- list( "TADA_with_ATTAINS" = NULL, diff --git a/R/MaintenanceScheduled.R b/R/MaintenanceScheduled.R index e183adb42..1837b07de 100644 --- a/R/MaintenanceScheduled.R +++ b/R/MaintenanceScheduled.R @@ -1,12 +1,12 @@ #' Update All TADA Reference Files (Internal) #' -#' This internal function updates all TADA reference files by calling a series of update functions. -#' It is used to ensure that all reference data is current and accurate across various +#' This internal function updates all TADA reference files by calling a series of update functions. +#' It is used to ensure that all reference data is current and accurate across various #' datasets and geospatial layers. #' -#' @details -#' The function sequentially calls several internal functions that update different sets -#' of reference data. Some updates may take longer than others, particularly the +#' @details +#' The function sequentially calls several internal functions that update different sets +#' of reference data. Some updates may take longer than others, particularly the #' `TADA_UpdateATTAINSParamUseOrgRef()` function. #' #' The specific reference files updated by this function include: @@ -27,7 +27,7 @@ #' \item Tribal geospatial layers #' } #' -#' @return +#' @return #' This function does not return any value. It performs updates as a side effect. #' #' @examples @@ -37,44 +37,56 @@ #' } .TADA_UpdateRefFiles <- function() { # Update All TADA Reference Files - + # ATTAINSRefTables.R - tryCatch({ - TADA_UpdateATTAINSOrgIDsRef() - TADA_UpdateATTAINSParamUseOrgRef() # takes a long time - TADA_UpdateATTAINSParameterWQPCharRef() - }, error = function(e) { - message("Error updating ATTAINS reference tables: ", e$message) - }) - + tryCatch( + { + TADA_UpdateATTAINSOrgIDsRef() + TADA_UpdateATTAINSParamUseOrgRef() # takes a long time + TADA_UpdateATTAINSParameterWQPCharRef() + }, + error = function(e) { + message("Error updating ATTAINS reference tables: ", e$message) + } + ) + # WQPWQXRefTables.R - tryCatch({ - TADA_UpdateWQXCharValRef() - TADA_UpdateMeasureUnitRef() - TADA_UpdateDetCondRef() - TADA_UpdateDetLimitRef() - TADA_UpdateActivityTypeRef() - TADA_UpdateCharacteristicRef() - TADA_UpdateMeasureQualifierCodeRef() - TADA_UpdateMonLocTypeRef() - TADA_UpdateWQPOrgProviderRef() - }, error = function(e) { - message("Error updating WQPWQX reference tables: ", e$message) - }) - + tryCatch( + { + TADA_UpdateWQXCharValRef() + TADA_UpdateMeasureUnitRef() + TADA_UpdateDetCondRef() + TADA_UpdateDetLimitRef() + TADA_UpdateActivityTypeRef() + TADA_UpdateCharacteristicRef() + TADA_UpdateMeasureQualifierCodeRef() + TADA_UpdateMonLocTypeRef() + TADA_UpdateWQPOrgProviderRef() + }, + error = function(e) { + message("Error updating WQPWQX reference tables: ", e$message) + } + ) + # CriteriaRefTables.R - tryCatch({ - TADA_UpdateEPACSTRef() - }, error = function(e) { - message("Error updating EPA CST reference: ", e$message) - }) - + tryCatch( + { + TADA_UpdateEPACSTRef() + }, + error = function(e) { + message("Error updating EPA CST reference: ", e$message) + } + ) + # TADAGeospatialRefLayers.R - tryCatch({ - TADA_UpdateTribalLayers() - }, error = function(e) { - message("Error updating tribal geospatial layers: ", e$message) - }) + tryCatch( + { + TADA_UpdateTribalLayers() + }, + error = function(e) { + message("Error updating tribal geospatial layers: ", e$message) + } + ) } #' Update Example Data for EPATADA Package (Internal) @@ -94,258 +106,262 @@ #' .TADA_UpdateExampleData() #' } .TADA_UpdateExampleData <- function() { - tryCatch({ - # ======================================= - # Generate Data_Nutrients_UT - # ======================================= - Data_Nutrients_UT <- TADA_DataRetrieval( - statecode = "UT", - characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), - startDate = "2020-10-01", - endDate = "2022-09-30", - ask = FALSE - ) - message("Data_Nutrients_UT") - message(dim(Data_Nutrients_UT)) - usethis::use_data(Data_Nutrients_UT, - internal = FALSE, overwrite = TRUE, - compress = "xz", version = 3, ascii = FALSE - ) - rm(Data_Nutrients_UT) - - # ======================================= - # Generate Data_6Tribes_5y - # ======================================= - Data_6Tribes_5y <- TADA_DataRetrieval( - organization = c( - "REDLAKE_WQX", "SFNOES_WQX", "PUEBLO_POJOAQUE", - "FONDULAC_WQX", "PUEBLOOFTESUQUE", "CNENVSER" - ), - startDate = "2018-01-01", - endDate = "2023-01-01", - ask = FALSE - ) - message("Data_6Tribes_5y") - message(dim(Data_6Tribes_5y)) - usethis::use_data(Data_6Tribes_5y, - internal = FALSE, overwrite = TRUE, - compress = "xz", version = 3, ascii = FALSE - ) - - # ======================================= - # Harmonize Data_6Tribes_5y - # ======================================= - harmonized_data <- subset(Data_6Tribes_5y, Data_6Tribes_5y$TADA.ActivityMediaName %in% c("WATER")) - harmonized_data <- TADA_RunKeyFlagFunctions(harmonized_data, clean = TRUE) - rm(Data_6Tribes_5y) - - harmonized_data <- harmonized_data %>% - TADA_FlagMethod(clean = TRUE) %>% - TADA_FlagAboveThreshold(clean = TRUE) %>% - TADA_FlagBelowThreshold(clean = TRUE) %>% - TADA_FindPotentialDuplicatesMultipleOrgs(dist_buffer = 100) %>% - TADA_FindPotentialDuplicatesSingleOrg() %>% - dplyr::filter(!(MeasureQualifierCode %in% c("D", "H", "ICA", "*"))) %>% - TADA_SimpleCensoredMethods( + tryCatch( + { + # ======================================= + # Generate Data_Nutrients_UT + # ======================================= + Data_Nutrients_UT <- TADA_DataRetrieval( + statecode = "UT", + characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), + startDate = "2020-10-01", + endDate = "2022-09-30", + ask = FALSE + ) + message("Data_Nutrients_UT") + message(dim(Data_Nutrients_UT)) + usethis::use_data(Data_Nutrients_UT, + internal = FALSE, overwrite = TRUE, + compress = "xz", version = 3, ascii = FALSE + ) + rm(Data_Nutrients_UT) + + # ======================================= + # Generate Data_6Tribes_5y + # ======================================= + Data_6Tribes_5y <- TADA_DataRetrieval( + organization = c( + "REDLAKE_WQX", "SFNOES_WQX", "PUEBLO_POJOAQUE", + "FONDULAC_WQX", "PUEBLOOFTESUQUE", "CNENVSER" + ), + startDate = "2018-01-01", + endDate = "2023-01-01", + ask = FALSE + ) + message("Data_6Tribes_5y") + message(dim(Data_6Tribes_5y)) + usethis::use_data(Data_6Tribes_5y, + internal = FALSE, overwrite = TRUE, + compress = "xz", version = 3, ascii = FALSE + ) + + # ======================================= + # Harmonize Data_6Tribes_5y + # ======================================= + harmonized_data <- subset(Data_6Tribes_5y, Data_6Tribes_5y$TADA.ActivityMediaName %in% c("WATER")) + harmonized_data <- TADA_RunKeyFlagFunctions(harmonized_data, clean = TRUE) + rm(Data_6Tribes_5y) + + harmonized_data <- harmonized_data %>% + TADA_FlagMethod(clean = TRUE) %>% + TADA_FlagAboveThreshold(clean = TRUE) %>% + TADA_FlagBelowThreshold(clean = TRUE) %>% + TADA_FindPotentialDuplicatesMultipleOrgs(dist_buffer = 100) %>% + TADA_FindPotentialDuplicatesSingleOrg() %>% + dplyr::filter(!(MeasureQualifierCode %in% c("D", "H", "ICA", "*"))) %>% + TADA_SimpleCensoredMethods( + nd_method = "multiplier", + nd_multiplier = 0.5, + od_method = "as-is", + od_multiplier = "null" + ) %>% + dplyr::filter(TADA.ResultMeasureValueDataTypes.Flag != "Text" & + TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" & + !is.na(TADA.ResultMeasureValue)) + + Data_6Tribes_5y_Harmonized <- TADA_HarmonizeSynonyms(harmonized_data) + message("Data_6Tribes_5y_Harmonized") + message(dim(Data_6Tribes_5y_Harmonized)) + usethis::use_data(Data_6Tribes_5y_Harmonized, + internal = FALSE, overwrite = TRUE, + compress = "xz", version = 3, ascii = FALSE + ) + rm(Data_6Tribes_5y_Harmonized, harmonized_data) + + # ======================================= + # Generate Data_R5_TADAPackageDemo + # ======================================= + Data_R5_TADAPackageDemo <- TADA_DataRetrieval( + startDate = "2019-05-01", + endDate = "2019-05-07", + statecode = c("IL", "IN", "MI", "MN", "OH", "WI"), + ask = FALSE + ) + message("Data_R5_TADAPackageDemo") + message(dim(Data_R5_TADAPackageDemo)) + usethis::use_data(Data_R5_TADAPackageDemo, + internal = FALSE, overwrite = TRUE, + compress = "xz", version = 3, ascii = FALSE + ) + rm(Data_R5_TADAPackageDemo) + + # ======================================= + # Module 3 Vignette Example Data + # ======================================= + Data_WV <- TADA_DataRetrieval( + startDate = "2020-03-14", + huc = "02070004", + applyautoclean = FALSE, + ask = FALSE + ) + # Filter for surface water data (optional) + Data_WV <- TADA_AnalysisDataFilter( + Data_WV, + clean = TRUE, + surface_water = TRUE, + ground_water = FALSE, + sediment = FALSE + ) + # Remove single organization duplicates (required) + Data_WV <- TADA_FindPotentialDuplicatesSingleOrg(Data_WV) + Data_WV <- dplyr::filter(Data_WV, TADA.SingleOrgDup.Flag == "Unique") + # Perform autocleaning (required) + Data_WV <- TADA_AutoClean(Data_WV) + # Handle censored results (required) + Data_WV <- TADA_SimpleCensoredMethods( + Data_WV, nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is", od_multiplier = "null" + ) + # Remove multiple organization duplicates (optional) + Data_WV <- TADA_FindPotentialDuplicatesMultipleOrgs(Data_WV) + Data_WV <- dplyr::filter(Data_WV, TADA.ResultSelectedMultipleOrgs == "Y") + # Convert special characters + Data_WV <- TADA_ConvertSpecialChars(Data_WV, col = "TADA.ResultMeasureValue", clean = TRUE) + # Remove results with quality control issues (required) + Data_WV <- TADA_RunKeyFlagFunctions(Data_WV, clean = TRUE) + # Flag above and below threshold (do not remove) + Data_WV <- TADA_FlagAboveThreshold(Data_WV, clean = FALSE, flaggedonly = FALSE) + Data_WV <- TADA_FlagBelowThreshold(Data_WV, clean = FALSE, flaggedonly = FALSE) + # Harmonize synonyms + Data_WV <- TADA_HarmonizeSynonyms(Data_WV) + # Save example data + Data_HUC8_02070004_Mod1Output <- Data_WV + message("Data_HUC8_02070004_Mod1Output") + message(dim(Data_HUC8_02070004_Mod1Output)) + usethis::use_data(Data_HUC8_02070004_Mod1Output, + internal = FALSE, + overwrite = TRUE, + compress = "xz", + version = 3, + ascii = FALSE + ) + rm(Data_HUC8_02070004_Mod1Output, Data_WV) + + # ======================================= + # Generate Data_MT_MissoulaCounty + # ======================================= + Data_MT_MissoulaCounty <- TADA_DataRetrieval( + startDate = "2020-01-01", + endDate = "2022-12-31", + statecode = "MT", + characteristicName = c("Escherichia", "Escherichia coli", "pH"), + countycode = "Missoula County", + ask = FALSE ) %>% - dplyr::filter(TADA.ResultMeasureValueDataTypes.Flag != "Text" & - TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Available" & - !is.na(TADA.ResultMeasureValue)) - - Data_6Tribes_5y_Harmonized <- TADA_HarmonizeSynonyms(harmonized_data) - message("Data_6Tribes_5y_Harmonized") - message(dim(Data_6Tribes_5y_Harmonized)) - usethis::use_data(Data_6Tribes_5y_Harmonized, - internal = FALSE, overwrite = TRUE, - compress = "xz", version = 3, ascii = FALSE - ) - rm(Data_6Tribes_5y_Harmonized, harmonized_data) - - # ======================================= - # Generate Data_R5_TADAPackageDemo - # ======================================= - Data_R5_TADAPackageDemo <- TADA_DataRetrieval( - startDate = "2019-05-01", - endDate = "2019-05-07", - statecode = c("IL", "IN", "MI", "MN", "OH", "WI"), - ask = FALSE - ) - message("Data_R5_TADAPackageDemo") - message(dim(Data_R5_TADAPackageDemo)) - usethis::use_data(Data_R5_TADAPackageDemo, - internal = FALSE, overwrite = TRUE, - compress = "xz", version = 3, ascii = FALSE - ) - rm(Data_R5_TADAPackageDemo) - - # ======================================= - # Module 3 Vignette Example Data - # ======================================= - Data_WV <- TADA_DataRetrieval( - startDate = "2020-03-14", - huc = "02070004", - applyautoclean = FALSE, - ask = FALSE - ) - # Filter for surface water data (optional) - Data_WV <- TADA_AnalysisDataFilter( - Data_WV, - clean = TRUE, - surface_water = TRUE, - ground_water = FALSE, - sediment = FALSE - ) - # Remove single organization duplicates (required) - Data_WV <- TADA_FindPotentialDuplicatesSingleOrg(Data_WV) - Data_WV <- dplyr::filter(Data_WV, TADA.SingleOrgDup.Flag == "Unique") - # Perform autocleaning (required) - Data_WV <- TADA_AutoClean(Data_WV) - # Handle censored results (required) - Data_WV <- TADA_SimpleCensoredMethods( - Data_WV, - nd_method = "multiplier", - nd_multiplier = 0.5, - od_method = "as-is", - od_multiplier = "null" - ) - # Remove multiple organization duplicates (optional) - Data_WV <- TADA_FindPotentialDuplicatesMultipleOrgs(Data_WV) - Data_WV <- dplyr::filter(Data_WV, TADA.ResultSelectedMultipleOrgs == "Y") - # Convert special characters - Data_WV <- TADA_ConvertSpecialChars(Data_WV, col = "TADA.ResultMeasureValue", clean = TRUE) - # Remove results with quality control issues (required) - Data_WV <- TADA_RunKeyFlagFunctions(Data_WV, clean = TRUE) - # Flag above and below threshold (do not remove) - Data_WV <- TADA_FlagAboveThreshold(Data_WV, clean = FALSE, flaggedonly = FALSE) - Data_WV <- TADA_FlagBelowThreshold(Data_WV, clean = FALSE, flaggedonly = FALSE) - # Harmonize synonyms - Data_WV <- TADA_HarmonizeSynonyms(Data_WV) - # Save example data - Data_HUC8_02070004_Mod1Output <- Data_WV - message("Data_HUC8_02070004_Mod1Output") - message(dim(Data_HUC8_02070004_Mod1Output)) - usethis::use_data(Data_HUC8_02070004_Mod1Output, - internal = FALSE, - overwrite = TRUE, - compress = "xz", - version = 3, - ascii = FALSE - ) - rm(Data_HUC8_02070004_Mod1Output, Data_WV) - - # ======================================= - # Generate Data_MT_MissoulaCounty - # ======================================= - Data_MT_MissoulaCounty <- TADA_DataRetrieval( - startDate = "2020-01-01", - endDate = "2022-12-31", - statecode = "MT", - characteristicName = c("Escherichia", "Escherichia coli", "pH"), - countycode = "Missoula County", - ask = FALSE - ) %>% - TADA_RunKeyFlagFunctions() %>% - TADA_SimpleCensoredMethods() %>% - TADA_HarmonizeSynonyms() - - message("Data_MT_MissoulaCounty") - message(dim(Data_MT_MissoulaCounty)) - usethis::use_data(Data_MT_MissoulaCounty, - internal = FALSE, overwrite = TRUE, - compress = "xz", version = 3, ascii = FALSE - ) - - # ======================================= - # Generate Data_MT_AUMLRef - # ======================================= - # Retrieve and clean crosswalk from ATTAINS - attains.existing.MT <- TADA_GetATTAINSAUMLCrosswalk(org_id = "MTDEQ") - clean.existing.attains.MT <- TADA_UpdateATTAINSAUMLCrosswalk(org_id = "MTDEQ") - - # Create a user-supplied crosswalk for demonstration purposes - user_supplied_cw <- clean.existing.attains.MT %>% - dplyr::select( - ATTAINS.AssessmentUnitIdentifier, - ATTAINS.MonitoringLocationIdentifier, - ATTAINS.WaterType - ) %>% - dplyr::filter(ATTAINS.MonitoringLocationIdentifier %in% c( - "MDEQ_WQ_WQX-C04CKFKR05", "MDEQ_WQ_WQX-C04KNDYC01", "MDEQ_WQ_WQX-C04KNDYC02", - "MDEQ_WQ_WQX-C04KNDYC04", "MDEQ_WQ_WQX-C04KNDYC54" - )) %>% - dplyr::rename( - AssessmentUnitIdentifier = ATTAINS.AssessmentUnitIdentifier, - MonitoringLocationIdentifier = ATTAINS.MonitoringLocationIdentifier, - WaterType = ATTAINS.WaterType - ) %>% - # Add a new assessment unit for demonstration - dplyr::bind_rows(c( - AssessmentUnitIdentifier = "NEW:EX_MDEQ_WQ_WQX", - MonitoringLocationIdentifier = "NARS_WQX-NWC_MT-10184", - WaterType = "LAKE, FRESHWATER" - )) - - MT_AUMLRef <- TADA_CreateAUMLCrosswalk( - Data_MT_MissoulaCounty, - au_ref = user_supplied_cw, - org_id = "MTDEQ", - fill_ATTAINS_catch = TRUE, - fill_USGS_catch = TRUE, - return_nearest = TRUE, - batch_upload = TRUE - ) - - Data_MT_AUMLRef <- MT_AUMLRef - - rm(MT_AUMLRef) - - message("Data_MT_AUMLRef") - message(dim(Data_MT_AUMLRef)) - usethis::use_data(Data_MT_AUMLRef, - internal = FALSE, - overwrite = TRUE, - compress = "xz", - version = 3, - ascii = FALSE) - - # ======================================= - # Generate Data_MT_UseAURef - # ======================================= - Data_MT_UseAURef <- TADA_CreateUseAURef(AUMLRef = Data_MT_AUMLRef, org_id = "MTDEQ") - - message("Data_MT_UseAURef") - message(dim(Data_MT_UseAURef)) - usethis::use_data(Data_MT_UseAURef, - internal = FALSE, overwrite = TRUE, - compress = "xz", version = 3, ascii = FALSE - ) - rm(attains.existing.MT, clean.existing.attains.MT, user_supplied_cw, - MT_AUMLRef) - - # ======================================= - # Generate Data_MT.UseAURef_Water - # ======================================= - Data_MT_UseAURef_Water <- TADA_CreateUseAURef( - waterUseRef = TADA_CreateWaterUseRef(org_id = "MTDEQ"), - AUMLRef = Data_MT_AUMLRef, - org_id = "MTDEQ" - ) - - message("Data_MT_UseAURef_Water") - message(dim(Data_MT_UseAURef_Water)) - - usethis::use_data(Data_MT_UseAURef_Water, - internal = FALSE, overwrite = TRUE, - compress = "xz", version = 3, ascii = FALSE - ) - rm(Data_MT_UseAURef_Water) - - }, error = function(e) { - message("An error occurred during data update: ", e$message) - }) -} + TADA_RunKeyFlagFunctions() %>% + TADA_SimpleCensoredMethods() %>% + TADA_HarmonizeSynonyms() + + message("Data_MT_MissoulaCounty") + message(dim(Data_MT_MissoulaCounty)) + usethis::use_data(Data_MT_MissoulaCounty, + internal = FALSE, overwrite = TRUE, + compress = "xz", version = 3, ascii = FALSE + ) + + # ======================================= + # Generate Data_MT_AUMLRef + # ======================================= + # Retrieve and clean crosswalk from ATTAINS + attains.existing.MT <- TADA_GetATTAINSAUMLCrosswalk(org_id = "MTDEQ") + clean.existing.attains.MT <- TADA_UpdateATTAINSAUMLCrosswalk(org_id = "MTDEQ") + + # Create a user-supplied crosswalk for demonstration purposes + user_supplied_cw <- clean.existing.attains.MT %>% + dplyr::select( + ATTAINS.AssessmentUnitIdentifier, + ATTAINS.MonitoringLocationIdentifier, + ATTAINS.WaterType + ) %>% + dplyr::filter(ATTAINS.MonitoringLocationIdentifier %in% c( + "MDEQ_WQ_WQX-C04CKFKR05", "MDEQ_WQ_WQX-C04KNDYC01", "MDEQ_WQ_WQX-C04KNDYC02", + "MDEQ_WQ_WQX-C04KNDYC04", "MDEQ_WQ_WQX-C04KNDYC54" + )) %>% + dplyr::rename( + AssessmentUnitIdentifier = ATTAINS.AssessmentUnitIdentifier, + MonitoringLocationIdentifier = ATTAINS.MonitoringLocationIdentifier, + WaterType = ATTAINS.WaterType + ) %>% + # Add a new assessment unit for demonstration + dplyr::bind_rows(c( + AssessmentUnitIdentifier = "NEW:EX_MDEQ_WQ_WQX", + MonitoringLocationIdentifier = "NARS_WQX-NWC_MT-10184", + WaterType = "LAKE, FRESHWATER" + )) + + MT_AUMLRef <- TADA_CreateAUMLCrosswalk( + Data_MT_MissoulaCounty, + au_ref = user_supplied_cw, + org_id = "MTDEQ", + fill_ATTAINS_catch = TRUE, + fill_USGS_catch = TRUE, + return_nearest = TRUE, + batch_upload = TRUE + ) + + Data_MT_AUMLRef <- MT_AUMLRef + + rm(MT_AUMLRef) + message("Data_MT_AUMLRef") + message(dim(Data_MT_AUMLRef)) + usethis::use_data(Data_MT_AUMLRef, + internal = FALSE, + overwrite = TRUE, + compress = "xz", + version = 3, + ascii = FALSE + ) + + # ======================================= + # Generate Data_MT_UseAURef + # ======================================= + Data_MT_UseAURef <- TADA_CreateUseAURef(AUMLRef = Data_MT_AUMLRef, org_id = "MTDEQ") + + message("Data_MT_UseAURef") + message(dim(Data_MT_UseAURef)) + usethis::use_data(Data_MT_UseAURef, + internal = FALSE, overwrite = TRUE, + compress = "xz", version = 3, ascii = FALSE + ) + rm( + attains.existing.MT, clean.existing.attains.MT, user_supplied_cw, + MT_AUMLRef + ) + + # ======================================= + # Generate Data_MT.UseAURef_Water + # ======================================= + Data_MT_UseAURef_Water <- TADA_CreateUseAURef( + waterUseRef = TADA_CreateWaterUseRef(org_id = "MTDEQ"), + AUMLRef = Data_MT_AUMLRef, + org_id = "MTDEQ" + ) + + message("Data_MT_UseAURef_Water") + message(dim(Data_MT_UseAURef_Water)) + + usethis::use_data(Data_MT_UseAURef_Water, + internal = FALSE, overwrite = TRUE, + compress = "xz", version = 3, ascii = FALSE + ) + rm(Data_MT_UseAURef_Water) + }, + error = function(e) { + message("An error occurred during data update: ", e$message) + } + ) +} diff --git a/R/ResultFlagsDependent.R b/R/ResultFlagsDependent.R index 52e1b70eb..78d05ead4 100644 --- a/R/ResultFlagsDependent.R +++ b/R/ResultFlagsDependent.R @@ -46,14 +46,12 @@ #' ) #' TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, c("TADA.CharacteristicName", "TADA.ResultSampleFractionText")) # check clean is boolean TADA_CheckType(clean, "logical") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - TADA_CheckColumns(.data, c("TADA.CharacteristicName", "TADA.ResultSampleFractionText")) # check that both clean and flaggedonly are not TRUE if (clean == TRUE & flaggedonly == TRUE) { stop("Function not executed because clean and flaggedonly cannot both be TRUE") @@ -204,14 +202,12 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) { #' ) #' TADA_FlagSpeciation <- function(.data, clean = c("suspect_only", "nonstandardized_only", "both", "none"), flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, c("TADA.CharacteristicName", "TADA.MethodSpeciationName")) # check clean is boolean TADA_CheckType(clean, "character") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - TADA_CheckColumns(.data, c("TADA.CharacteristicName", "TADA.MethodSpeciationName")) # check that clean is either "suspect_only", "nonstandardized_only", "both", or "none" clean <- match.arg(clean) @@ -381,8 +377,9 @@ TADA_FlagSpeciation <- function(.data, clean = c("suspect_only", "nonstandardize #' ) #' TADA_FlagResultUnit <- function(.data, clean = c("suspect_only", "nonstandardized_only", "both", "none"), flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + expected_cols <- c("TADA.CharacteristicName", "TADA.ResultMeasure.MeasureUnitCode", "TADA.ActivityMediaName") + TADA_CheckColumns(.data, expected_cols) # check clean is character TADA_CheckType(clean, "character") # check flaggedonly is boolean @@ -410,7 +407,6 @@ TADA_FlagResultUnit <- function(.data, clean = c("suspect_only", "nonstandardize .data$TADA.ActivityMediaName <- toupper(.data$ActivityMediaName) } - TADA_CheckColumns(.data, c("TADA.CharacteristicName", "TADA.ResultMeasure.MeasureUnitCode", "TADA.ActivityMediaName")) # check that clean is either "suspect_only", "nonstandardized_only", "both", or "none" clean <- match.arg(clean) @@ -560,14 +556,12 @@ TADA_FlagResultUnit <- function(.data, clean = c("suspect_only", "nonstandardize #' QC_clean <- TADA_FindQCActivities(Data_Nutrients_UT, clean = TRUE) #' TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, c("ActivityTypeCode")) # check that clean is boolean TADA_CheckType(clean, "logical") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - TADA_CheckColumns(.data, c("ActivityTypeCode")) # execute function after checks are passed # delete existing flag column @@ -695,10 +689,8 @@ TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) { #' # Find pairs for all data flagged as "QC_replicate" within a 5-minute time window: #' df_all_pairs_5min <- TADA_PairReplicates(df, time_difference = 300) TADA_PairReplicates <- function(.data, type = c("QC_replicate"), time_difference = 600) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - # check .data has required columns - TADA_CheckColumns(.data, c( + # check .data is data.frame and has required columns + expected_cols <- c( "OrganizationIdentifier", "ActivityTypeCode", "ActivityStartDate", "ActivityStartDateTime", "ResultIdentifier", "ActivityRelativeDepthName", @@ -708,7 +700,8 @@ TADA_PairReplicates <- function(.data, type = c("QC_replicate"), time_difference "TADA.ResultDepthHeightMeasure.MeasureValue", "TADA.ActivityTopDepthHeightMeasure.MeasureValue", "TADA.ActivityBottomDepthHeightMeasure.MeasureValue" - )) + ) + TADA_CheckColumns(.data, expected_cols) # run TADA_FindQCActivities if needed if (("TADA.ActivityType.Flag" %in% colnames(.data)) == TRUE) { @@ -847,14 +840,12 @@ TADA_PairReplicates <- function(.data, type = c("QC_replicate"), time_difference #' clean = TRUE, define = FALSE #' ) TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FALSE, define = TRUE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, "MeasureQualifierCode") # check that clean is boolean TADA_CheckType(clean, "logical") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - TADA_CheckColumns(.data, "MeasureQualifierCode") # check .data MeasureQualifierCode is not all NA. If it is, don't run function and return .data if (all(is.na(.data$MeasureQualifierCode))) { print("TADA_FlagMeasureQualifierCode: Dataframe does not include any information (all NA's) in MeasureQualifierCode.") diff --git a/R/ResultFlagsIndependent.R b/R/ResultFlagsIndependent.R index 1d93b5d77..bdf2f9ce9 100644 --- a/R/ResultFlagsIndependent.R +++ b/R/ResultFlagsIndependent.R @@ -54,18 +54,16 @@ #' ) #' TADA_FlagMethod <- function(.data, clean = FALSE, flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + expected_cols <- c( + "TADA.CharacteristicName", "ResultAnalyticalMethod.MethodIdentifier", + "ResultAnalyticalMethod.MethodIdentifierContext" + ) + TADA_CheckColumns(.data, expected_cols) # check clean is boolean TADA_CheckType(clean, "logical") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - required_cols <- c( - "TADA.CharacteristicName", "ResultAnalyticalMethod.MethodIdentifier", - "ResultAnalyticalMethod.MethodIdentifierContext" - ) - TADA_CheckColumns(.data, required_cols) # check that clean and flaggedonly are not both TRUE if (clean == TRUE & flaggedonly == TRUE) { stop("Function not executed because clean and flaggedonly cannot both be TRUE") @@ -218,14 +216,8 @@ TADA_FlagMethod <- function(.data, clean = FALSE, flaggedonly = FALSE) { #' } #' TADA_FlagContinuousData <- function(.data, clean = FALSE, flaggedonly = FALSE, time_difference = 4) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - # check clean is boolean - TADA_CheckType(clean, "logical") - # check flaggedonly is boolean - TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - TADA_CheckColumns(.data, c( + # check .data is data.frame and has required columns + expected_cols <- c( "ActivityTypeCode", "SampleCollectionEquipmentName", "ResultDetectionConditionText", @@ -235,8 +227,12 @@ TADA_FlagContinuousData <- function(.data, clean = FALSE, flaggedonly = FALSE, t "ResultIdentifier", "OrganizationIdentifier", "ActivityRelativeDepthName" - )) - + ) + TADA_CheckColumns(.data, expected_cols) + # check clean is boolean + TADA_CheckType(clean, "logical") + # check flaggedonly is boolean + TADA_CheckType(flaggedonly, "logical") # check that clean and flaggedonly are not both TRUE if (clean == TRUE & flaggedonly == TRUE) { stop("Function not executed because clean and flaggedonly cannot both be TRUE") @@ -479,18 +475,17 @@ TADA_FlagContinuousData <- function(.data, clean = FALSE, flaggedonly = FALSE, t #' ) #' TADA_FlagAboveThreshold <- function(.data, clean = FALSE, flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + expected_cols <- c( + "TADA.CharacteristicName", "TADA.ActivityMediaName", "TADA.ResultMeasureValue", + "TADA.ResultMeasure.MeasureUnitCode" + ) + TADA_CheckColumns(.data, expected_cols) # check clean is boolean TADA_CheckType(clean, "logical") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") # check .data has required columns - required_cols <- c( - "TADA.CharacteristicName", "TADA.ActivityMediaName", "TADA.ResultMeasureValue", - "TADA.ResultMeasure.MeasureUnitCode" - ) - TADA_CheckColumns(.data, required_cols) # check that clean and flaggedonly are not both TRUE if (clean == TRUE & flaggedonly == TRUE) { stop("Function not executed because clean and flaggedonly cannot both be TRUE") @@ -687,18 +682,16 @@ TADA_FlagAboveThreshold <- function(.data, clean = FALSE, flaggedonly = FALSE) { #' ) #' TADA_FlagBelowThreshold <- function(.data, clean = FALSE, flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + expected_cols <- c( + "TADA.CharacteristicName", "TADA.ActivityMediaName", "TADA.ResultMeasureValue", + "TADA.ResultMeasure.MeasureUnitCode" + ) + TADA_CheckColumns(.data, expected_cols) # check clean is boolean TADA_CheckType(clean, "logical") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - required_cols <- c( - "TADA.CharacteristicName", "TADA.ActivityMediaName", "TADA.ResultMeasureValue", - "TADA.ResultMeasure.MeasureUnitCode" - ) - TADA_CheckColumns(.data, required_cols) # check that clean and flaggedonly are not both TRUE if (clean == TRUE & flaggedonly == TRUE) { stop("Function not executed because clean and flaggedonly cannot both be TRUE") @@ -904,16 +897,14 @@ TADA_FlagBelowThreshold <- function(.data, clean = FALSE, flaggedonly = FALSE) { #' # Note: When clean = TRUE, cleanNA = TRUE, and flaggedonly = TRUE, an error message is returned #' TADA_FindQAPPApproval <- function(.data, clean = FALSE, cleanNA = FALSE, flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, "QAPPApprovedIndicator") # check clean is boolean TADA_CheckType(clean, "logical") # check cleanNA is boolean TADA_CheckType(cleanNA, "logical") # check flaggedonly is boolean TADA_CheckType(flaggedonly, "logical") - # check .data has required columns - TADA_CheckColumns(.data, "QAPPApprovedIndicator") # check that clean, cleanNA and flaggedonly are not all TRUE if (clean == TRUE & cleanNA == TRUE & flaggedonly == TRUE) { stop("Function not executed because clean, cleanNA, and flaggedonly cannot all be TRUE") @@ -1010,10 +1001,6 @@ TADA_FindQAPPApproval <- function(.data, clean = FALSE, cleanNA = FALSE, flagged TADA_FindQAPPDoc <- function(.data, clean = FALSE) { # check .data is data.frame TADA_CheckType(.data, "data.frame", "Input object") - # check clean is boolean - TADA_CheckType(clean, "logical") - # check .data has required columns - # generate required column if it does not exist (there is no project data) if ("ProjectFileUrl" %in% colnames(.data)) { .data <- .data @@ -1021,10 +1008,11 @@ TADA_FindQAPPDoc <- function(.data, clean = FALSE) { # create empty ProjectFileUrl column .data[, "ProjectFileUrl"] <- NA } - # check .data has required columns TADA_CheckColumns(.data, "ProjectFileUrl") + # check clean is boolean + TADA_CheckType(clean, "logical") # default flag column .data$TADA.QAPPDocAvailable <- "N" @@ -1164,14 +1152,12 @@ TADA_FlagCoordinates <- function(.data, clean_outsideUSA = c("no", "remove", "change sign"), clean_imprecise = FALSE, flaggedonly = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, c("TADA.LatitudeMeasure", "TADA.LongitudeMeasure")) # check clean_outsideUSA is character TADA_CheckType(clean_outsideUSA, "character") # check clean_imprecise is boolean TADA_CheckType(clean_imprecise, "logical") - # check .data has required columns - TADA_CheckColumns(.data, c("TADA.LatitudeMeasure", "TADA.LongitudeMeasure")) # check lat and long are "numeric" if (!is.numeric(.data$TADA.LongitudeMeasure)) { warning("TADA.LongitudeMeasure field must be numeric") diff --git a/R/TADARefTables.R b/R/TADARefTables.R index c059c9104..02194e580 100644 --- a/R/TADARefTables.R +++ b/R/TADARefTables.R @@ -60,10 +60,7 @@ TADA_GetSynonymRef <- function(.data) { return(ref) } - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # check .data has the required columns + # check .data is data.frame and has required columns expected_cols <- c( "TADA.CharacteristicName", "TADA.ResultSampleFractionText", diff --git a/R/Transformations.R b/R/Transformations.R index 6e8bc770c..68cb43504 100644 --- a/R/Transformations.R +++ b/R/Transformations.R @@ -63,78 +63,67 @@ #' } #' TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # Check if the input data frame is empty - if (nrow(.data) == 0) { - message("The entered data frame is empty. The function will not run.") - return(NULL) # Exit the function early - } - - # check .data has the required columns + # check .data is data.frame and has required columns expected_cols <- c( "TADA.CharacteristicName", "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName" ) TADA_CheckColumns(.data, expected_cols) - - # define which columns are expected in ref - expected_ref_cols <- c( - "TADA.CharacteristicName", - "Target.TADA.CharacteristicName", - "TADA.CharacteristicNameAssumptions", - "TADA.ResultSampleFractionText", - "Target.TADA.ResultSampleFractionText", - "TADA.FractionAssumptions", - "TADA.MethodSpeciationName", - "Target.TADA.MethodSpeciationName", - "TADA.SpeciationAssumptions", - "Target.TADA.SpeciationConversionFactor", - "HarmonizationGroup" - ) - + + # Check if the input data frame is empty + if (nrow(.data) == 0) { + message("The entered data frame is empty. The function will not run.") + return(NULL) # Exit the function early + } + # if class(ResultMeasureValue) != numeric, run special char function - EDH - should not be needed at this point but doesn't hurt. if (!is.numeric(.data$TADA.ResultMeasureValue)) { stop("TADA.ResultMeasureValue is not numeric. This column must be numeric before proceeding.") } - + # Changes NONE in fraction and speciation to NA for better harmonization # Should this be specified in the template instead? 7/25/25 cm .data <- .data %>% dplyr::mutate( TADA.ResultSampleFractionText = replace(TADA.ResultSampleFractionText, TADA.ResultSampleFractionText %in% c("NONE"), NA), TADA.MethodSpeciationName = replace(TADA.MethodSpeciationName, TADA.MethodSpeciationName %in% c("NONE"), NA) ) - + # define harm.ref # if input for ref exists, use that data if (!missing(ref)) { - # check ref is data.frame - TADA_CheckType(ref, "data.frame") - # check ref has all of the required columns + expected_ref_cols <- c(expected_cols, c( + "Target.TADA.CharacteristicName", + "Target.TADA.ResultSampleFractionText", + "Target.TADA.MethodSpeciationName", + "TADA.CharacteristicNameAssumptions", + "TADA.FractionAssumptions", + "TADA.SpeciationAssumptions", + "Target.TADA.SpeciationConversionFactor", + "HarmonizationGroup" + )) TADA_CheckColumns(ref, expected_ref_cols) - + harm.ref <- ref } - + # if input for ref does not exist, use raw harmonization template if (missing(ref)) { # use output of TADA_GetSynonymRef which uses the TADA HarmonizationTemplate.csv in the extdata folder harm.ref <- TADA_GetSynonymRef(.data) %>% dplyr::distinct() } - + # find places where metadata will be changed and add targets harm.ref$TADA.Harmonized.Flag <- ifelse(!is.na(harm.ref$Target.TADA.CharacteristicName) | - !is.na(harm.ref$Target.TADA.ResultSampleFractionText) | - !is.na(harm.ref$Target.TADA.MethodSpeciationName), - TRUE, FALSE + !is.na(harm.ref$Target.TADA.ResultSampleFractionText) | + !is.na(harm.ref$Target.TADA.MethodSpeciationName), + TRUE, FALSE ) - + .data <- .data[, !names(.data) %in% c("TADA.ComparableDataIdentifier")] - + # join harm.ref to .data flag.data <- .data %>% dplyr::left_join(harm.ref, by = c( @@ -142,7 +131,7 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { "TADA.ResultSampleFractionText", "TADA.MethodSpeciationName" )) - + # TADA.CharacteristicName # replace TADA.CharacteristicName with Target.TADA.CharacteristicName clean.data <- flag.data %>% @@ -152,7 +141,7 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { # is.na(Target.TADA.CharacteristicName) ~ TADA.CharacteristicName, .default = TADA.CharacteristicName )) - + # TADA.ResultSampleFractionText # replace ResultSampleFractionText with Target.TADA.ResultSampleFractionText clean.data <- clean.data %>% @@ -163,13 +152,13 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { # is.na(Target.TADA.ResultSampleFractionText) ~ TADA.ResultSampleFractionText .default = TADA.ResultSampleFractionText )) - + # Handle instances with DO where the speciation is listed "AS O2" but it should be NA clean.data$TADA.MethodSpeciationName <- ifelse(!is.na(clean.data$TADA.MethodSpeciationName) & is.na(clean.data$Target.TADA.MethodSpeciationName) & !is.na(clean.data$TADA.SpeciationAssumptions), clean.data$Target.TADA.MethodSpeciationName, clean.data$TADA.MethodSpeciationName) - + # TADA.MethodSpeciationName # replace MethodSpeciationName with Target.TADA.MethodSpeciationName - + if (np_speciation == TRUE) { clean.data <- clean.data %>% # use TADA suggested spec where there is a suggested spec, use original spec if no suggested spec @@ -195,7 +184,7 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { .default = TADA.MethodSpeciationName )) } - + # remove conversion columns clean.data <- clean.data %>% dplyr::select(-c( @@ -205,9 +194,9 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { "Target.TADA.SpeciationConversionFactor", "HarmonizationGroup" )) - + clean.data$TADA.Harmonized.Flag <- ifelse(is.na(clean.data$TADA.Harmonized.Flag), FALSE, clean.data$TADA.Harmonized.Flag) - + # return clean.data clean.data <- TADA_CreateComparableID(clean.data) clean.data <- TADA_OrderCols(clean.data) @@ -321,19 +310,7 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) { TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "mean")) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - - # Check if the input data frame is empty - if (nrow(.data) == 0) { - message("The entered data frame is empty. The function will not run.") - return(NULL) # Exit the function early - } - - # check to make sure daily_agg is populated with allowable value - daily_agg <- match.arg(daily_agg) - - # check required columns for TADA dataset + # check .data is data.frame and has required columns req_cols <- c( "TADA.CharacteristicName", "TADA.ResultSampleFractionText", @@ -360,33 +337,40 @@ TADA_CalculateTotalNP <- function(.data, "TADA.ComparableDataIdentifier", "TADA.ResultMeasureValueDataTypes.Flag" ) - TADA_CheckColumns(.data, expected_cols = req_cols) - - + TADA_CheckColumns(.data, req_cols) + # Check if the input data frame is empty + if (nrow(.data) == 0) { + message("The entered data frame is empty. The function will not run.") + return(NULL) # Exit the function early + } + + # check to make sure daily_agg is populated with allowable value + daily_agg <- match.arg(daily_agg) + # check if QC flag function ran and message warning if not if (!"TADA.ActivityType.Flag" %in% names(.data)) { message("TADA_CalculateTotalNP: Your input dataset was missing the TADA.ActivityType.Flag column, suggesting that QC replicates have not been addressed or reviewed. Running the TADA_FindQCActivities function with the clean = FALSE option before executing this function. This function will not include QC results when aggregating to a daily maximum and total nutrient value.") .data <- TADA_FindQCActivities(.data, clean = FALSE) } - + # check if unit flag function ran and message warning if not if (!"TADA.ResultUnit.Flag" %in% names(.data)) { message("TADA_CalculateTotalNP: Your input dataset was missing the TADA.ResultUnit.Flag column, suggesting that unit and characteristic combinations have not been addressed or reviewed. Running the TADA_FlagResultUnit function with the clean = FALSE option before executing this function. This function will not include results with invalid or suspect units when aggregating to a daily maximum and total nutrient value.") .data <- TADA_FlagResultUnit(.data, clean = "none") } - + # check if fraction flag function ran and message if not if (!"TADA.SampleFraction.Flag" %in% names(.data)) { message("TADA_CalculateTotalNP: Your input dataset was missing the TADA.SampleFraction.Flag column, suggesting that fraction and characteristic combinations have not been addressed or reviewed. Running the TADA_FlagFraction function with the clean = FALSE option before executing this function. This function will not include results with invalid or suspect fractions when aggregating to a daily maximum and total nutrient value.") .data <- TADA_FlagFraction(.data, clean = FALSE) } - + # check if speciation flag function ran and message warning if not if (!"TADA.MethodSpeciation.Flag" %in% names(.data)) { message("TADA_CalculateTotalNP: Your input dataset was missing the TADA.MethodSpeciation.Flag column, suggesting that speciation and characteristic combinations have not been addressed or reviewed. Running the TADA_FlagSpeciation function with the clean = FALSE option before executing this function. This function will not include results with invalid or suspect speciations when aggregating to a daily maximum and total nutrient value.") .data <- TADA_FlagSpeciation(.data, clean = "none") } - + # Check if the specified values are present in the TADA.ResultMeasureValueDataTypes.Flag column if (any(.data$TADA.ResultMeasureValueDataTypes.Flag %in% c( "TP estimated from one or more subspecies.", @@ -396,70 +380,70 @@ TADA_CalculateTotalNP <- function(.data, message(paste("TADA_CalculateTotalNP has already been run. Returning data unchanged. See TADA.ResultMeasureValueDataTypes.Flag column.")) return(.data) } - + # Create the include and exclude data frames include_df <- .data[.data$TADA.ActivityType.Flag == "Non_QC" & - (.data$TADA.ResultMeasureValueDataTypes.Flag %in% - c( - "Numeric", - "Result Value/Unit Estimated from Detection Limit", - "Less Than", - "Percentage", - "Approximate Value", - "Greater Than", - "Comma-Separated Numeric", - "Numeric Range - Averaged", - "Percentage Range - Averaged", - "Approximate Value", - "Result Value/Unit Copied from Detection Limit" - )) & - (.data$TADA.ResultUnit.Flag %in% - c( - "Pass", - "Not Reviewed" - )) & - (.data$TADA.SampleFraction.Flag %in% - c( - "Pass", - "Not Reviewed" - )) & - (.data$TADA.MethodSpeciation.Flag %in% - c( - "Pass", - "Not Reviewed" - )), ] - + (.data$TADA.ResultMeasureValueDataTypes.Flag %in% + c( + "Numeric", + "Result Value/Unit Estimated from Detection Limit", + "Less Than", + "Percentage", + "Approximate Value", + "Greater Than", + "Comma-Separated Numeric", + "Numeric Range - Averaged", + "Percentage Range - Averaged", + "Approximate Value", + "Result Value/Unit Copied from Detection Limit" + )) & + (.data$TADA.ResultUnit.Flag %in% + c( + "Pass", + "Not Reviewed" + )) & + (.data$TADA.SampleFraction.Flag %in% + c( + "Pass", + "Not Reviewed" + )) & + (.data$TADA.MethodSpeciation.Flag %in% + c( + "Pass", + "Not Reviewed" + )), ] + exclude_df <- .data[.data$TADA.ActivityType.Flag != "Non_QC" | - is.na(.data$TADA.ResultMeasureValueDataTypes.Flag) | - (.data$TADA.ResultMeasureValueDataTypes.Flag %in% - c( - "NA - Not Available", - "Text", - "Non-ASCII Character(s)", - "Result Value/Unit Cannot Be Estimated From Detection Limit", - "Coerced to NA" - )) | - !(.data$TADA.ResultUnit.Flag %in% - c( - "Pass", - "Not Reviewed" - )) | - !(.data$TADA.SampleFraction.Flag %in% - c( - "Pass", - "Not Reviewed" - )) | - !(.data$TADA.MethodSpeciation.Flag %in% - c( - "Pass", - "Not Reviewed" - )), ] - + is.na(.data$TADA.ResultMeasureValueDataTypes.Flag) | + (.data$TADA.ResultMeasureValueDataTypes.Flag %in% + c( + "NA - Not Available", + "Text", + "Non-ASCII Character(s)", + "Result Value/Unit Cannot Be Estimated From Detection Limit", + "Coerced to NA" + )) | + !(.data$TADA.ResultUnit.Flag %in% + c( + "Pass", + "Not Reviewed" + )) | + !(.data$TADA.SampleFraction.Flag %in% + c( + "Pass", + "Not Reviewed" + )) | + !(.data$TADA.MethodSpeciation.Flag %in% + c( + "Pass", + "Not Reviewed" + )), ] + # add flags noting these are not used in TN/TP summation exclude_df <- exclude_df %>% dplyr::mutate(TADA.NutrientSummation.Flag = "Not used to calculate Total N or P.") %>% dplyr::mutate(TADA.ResultValueAggregation.Flag = "Not considered in max aggregation function") - + # # For function testing only # # Calculate the number of rows in each data frame # total_rows_data <- nrow(.data) @@ -475,18 +459,17 @@ TADA_CalculateTotalNP <- function(.data, # } else { # print("Test failed: The sum of rows in include_df and exclude_df does not equal the total rows in .data.") # } - + # bring in custom reference df if provided if (!missing(sum_ref)) { - ref_cols <- names(TADA_GetNutrientSummationRef()) - TADA_CheckColumns(sum_ref, expected_cols = ref_cols) + TADA_CheckColumns(sum_ref, names(TADA_GetNutrientSummationRef())) } else { sum_ref <- TADA_GetNutrientSummationRef() } - + # Get grouping cols for daily aggregation # create nutrient groups by site and date - + # # used to include depth as well. cm removed 8/6/25 # depths <- names(include_df)[grepl("DepthHeightMeasure", names(include_df))] # depths <- depths[grepl("TADA.", depths)] @@ -513,11 +496,11 @@ TADA_CalculateTotalNP <- function(.data, # "ActivityRelativeDepthName" # depths # does not make sense for daily aggregation of a max value. Use max value for day regardless of depth ) - + dat <- suppressMessages(TADA_AggregateMeasurements(include_df, - grouping_cols = grpcols, - agg_fun = daily_agg, - clean = FALSE + grouping_cols = grpcols, + agg_fun = daily_agg, + clean = FALSE )) # # for function review only # dat_subset <- dat %>% @@ -527,7 +510,7 @@ TADA_CalculateTotalNP <- function(.data, # "TADA.ActivityMediaName", # "TADA.ComparableDataIdentifier", # "TADA.ResultValueAggregation.Flag"))) - + # Add rows not selected back at end but do not include in TN/TP summation # Define the condition for rows to be added back condition <- paste0("Considered in ", daily_agg, " aggregation function but not selected") @@ -541,7 +524,7 @@ TADA_CalculateTotalNP <- function(.data, if (dim(dat_addback)[1] > 0) { dat_addback$TADA.NutrientSummation.Flag <- "Not used to calculate Total N or P." } - + # move forward with only max values selected for each grouping (dat_TNTP) # TADA.ResultValueAggregation.Flag should be "No aggregation needed" OR "Selected as max aggregate value" # no longer need "Considered in max aggregation function but not selected" @@ -557,11 +540,11 @@ TADA_CalculateTotalNP <- function(.data, message("There is no applicable data to calculate TN or TP. Returning data unchanged.") return(.data) } - + # join data to summation table and keep only those that match for summations sum_dat <- merge(dat_TNTP, sum_ref, all.x = TRUE) sum_dat <- subset(sum_dat, !is.na(sum_dat$NutrientGroup)) - + # # REMINDER FOR TADA TEAM: NEED TO ENSURE ALL COMBOS PRESENT IN TABLE # # for review only: what is not matching? # sum_dat_review <- sum_dat %>% @@ -573,7 +556,7 @@ TADA_CalculateTotalNP <- function(.data, # "TADA.ResultValueAggregation.Flag", # "NutrientGroup"))) %>% # dplyr::filter(is.na(NutrientGroup)) - + # If the join results in matching rows if (dim(sum_dat)[1] > 0) { thecols <- grpcols[!grpcols %in% c( @@ -583,22 +566,22 @@ TADA_CalculateTotalNP <- function(.data, "TADA.MethodSpeciationName", "TADA.ResultSampleFractionText" )] - + # create nutrient group ID's. sum_dat <- sum_dat %>% dplyr::group_by(dplyr::across(dplyr::all_of(thecols))) %>% dplyr::mutate(TADA.NutrientSummationGroup = dplyr::cur_group_id()) - + # bring in equations eqns <- utils::read.csv(system.file("extdata", - "NP_equations.csv", - package = "EPATADA" + "NP_equations.csv", + package = "EPATADA" )) - + # dataframe to hold results summeddata <- data.frame() grps <- vector() - + for (i in 1:length(unique(eqns$Nutrient))) { nut <- unique(eqns$Nutrient)[i] nutqns <- subset(eqns, eqns$Nutrient == nut) @@ -606,8 +589,8 @@ TADA_CalculateTotalNP <- function(.data, eqnum <- unique(nutqns$EQN)[j] eqn <- subset(nutqns, nutqns$EQN == eqnum)$SummationName nutrient <- ifelse(nut == "N", - "Total Nitrogen as N", - "Total Phosphorus as P" + "Total Nitrogen as N", + "Total Phosphorus as P" ) # for each equation, see if any groups contain all required subspecies, # and for each pick the variant with the lowest rank. @@ -619,7 +602,7 @@ TADA_CalculateTotalNP <- function(.data, dplyr::filter(all(eqn %in% SummationName)) %>% # this line ensures that ALL subspecies are present within an equation group, not just one or more dplyr::filter(SummationName %in% eqn) %>% dplyr::mutate(TADA.NutrientSummationEquation = paste0(unique(SummationName), collapse = " + ")) - + out <- out %>% dplyr::group_by(TADA.NutrientSummationGroup, SummationName) %>% dplyr::slice_min(SummationRank, with_ties = FALSE) @@ -629,12 +612,12 @@ TADA_CalculateTotalNP <- function(.data, grps <- c(grps, unique(out$TADA.NutrientSummationGroup)) } } - + # Convert speciation if needed summeddata$TADA.ResultMeasureValue <- ifelse(!is.na(summeddata$SummationSpeciationConversionFactor), summeddata$TADA.ResultMeasureValue * summeddata$SummationSpeciationConversionFactor, summeddata$TADA.ResultMeasureValue) summeddata$TADA.MethodSpeciationName <- ifelse(!is.na(summeddata$SummationSpeciationConversionFactor) & summeddata$nutrient == "Total Nitrogen as N", "AS N", summeddata$TADA.MethodSpeciationName) summeddata$TADA.MethodSpeciationName <- ifelse(!is.na(summeddata$SummationSpeciationConversionFactor) & summeddata$nutrient == "Total Phosphorus as P", "AS P", summeddata$TADA.MethodSpeciationName) - + # Get to total N or P totncols <- c(thecols, "TADA.NutrientSummationGroup", "TADA.NutrientSummationEquation") TotalN <- summeddata %>% @@ -665,7 +648,7 @@ TADA_CalculateTotalNP <- function(.data, TADA.ResultMeasureValueDataTypes.Flag = "TP estimated from one or more subspecies.", TADA.ResultValueAggregation.Flag = "Nutrient summation from selected aggregate values and values where no aggregation was needed." ) - + # If summation is zero....include anyway # Generate unique ResultIdentifier Totals <- plyr::rbind.fill(TotalN, TotalP) %>% @@ -676,7 +659,7 @@ TADA_CalculateTotalNP <- function(.data, dplyr::n() ) )) - + # Combine all data back into dat_TNTP and get rid of unneeded columns dat_TNTP_combined <- dat_TNTP %>% base::merge(summeddata, all.x = TRUE) %>% @@ -692,8 +675,8 @@ TADA_CalculateTotalNP <- function(.data, -NutrientGroup ) %>% dplyr::mutate(TADA.NutrientSummation.Flag = dplyr::if_else(is.na(TADA.NutrientSummation.Flag), - "Not used to calculate Total N or P.", - TADA.NutrientSummation.Flag + "Not used to calculate Total N or P.", + TADA.NutrientSummation.Flag )) # At end... summation complete at this point # Check if each data frame is not empty @@ -706,7 +689,7 @@ TADA_CalculateTotalNP <- function(.data, exclude_df_non_empty, dat_addback_non_empty ) - + # Filter rows based on specific conditions duplicates <- final_TNTP %>% dplyr::group_by(TADA.NutrientSummationGroup) %>% @@ -715,9 +698,9 @@ TADA_CalculateTotalNP <- function(.data, TADA.ResultMeasureValue[1] == TADA.ResultMeasureValue[2] ) %>% dplyr::filter(TADA.NutrientSummation.Flag == "New row added: Nutrient summation from one or more subspecies.") - + remove_list <- unique(duplicates$ResultIdentifier) - + # Filter the data frame complete_df <- final_TNTP %>% dplyr::filter(!ResultIdentifier %in% remove_list) @@ -726,19 +709,19 @@ TADA_CalculateTotalNP <- function(.data, dat_TNTP_non_empty <- if (nrow(dat_TNTP) > 0) dat_TNTP else NULL exclude_df_non_empty <- if (nrow(exclude_df) > 0) exclude_df else NULL dat_addback_non_empty <- if (nrow(dat_addback) > 0) dat_addback else NULL - + # Bind rows only if the data frames are not NULL complete_df <- dplyr::bind_rows( dat_TNTP_non_empty, exclude_df_non_empty, dat_addback_non_empty ) - + # if there are no data to sum complete_df$TADA.NutrientSummation.Flag <- "Not used to calculate Total N or P." message("No Total N or P subspecies exist in dataset. Returning input dataset with TADA.NutrientSummation.Flag set to 'Not used to calculate Total N or P'") } - + # order columns and return complete_df complete_df <- TADA_CreateComparableID(complete_df) complete_df <- TADA_OrderCols(complete_df) @@ -818,38 +801,36 @@ TADA_AggregateMeasurements <- function(.data, ), agg_fun = c("max", "min", "mean"), clean = FALSE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, grouping_cols) # Check if the input data frame is empty if (nrow(.data) == 0) { message("The entered data frame is empty. The function will not run.") return(NULL) # Exit the function early } - - TADA_CheckColumns(.data, grouping_cols) + agg_fun <- match.arg(agg_fun) - + # Find multiple values in groups ncount <- .data %>% dplyr::group_by(dplyr::across(dplyr::all_of(grouping_cols))) %>% dplyr::summarise(ncount = length(ResultIdentifier)) - + if (max(ncount$ncount) < 2) { message("TADA_AggregateMeasurements: No rows to aggregate.") return(.data) } else { dat <- merge(.data, ncount, all.x = TRUE) - + if (any(is.na(dat$TADA.ResultMeasureValue))) { "TADA_AggregateMeasurements: Your dataset contains one or more rows where TADA.ResultMeasureValue = NA. Recommend removing these rows before proceeding. Otherwise, the function will not consider NAs in its calculations." } - + dat$TADA.ResultValueAggregation.Flag <- ifelse(dat$ncount == 1, "No aggregation needed", paste0("Considered in ", agg_fun, " aggregation function but not selected")) multiples <- dat %>% dplyr::filter(ncount > 1) - + dat <- dat %>% dplyr::select(-ncount) - + if (agg_fun == "max") { out <- multiples %>% dplyr::group_by(dplyr::across(dplyr::all_of(grouping_cols))) %>% @@ -874,15 +855,15 @@ TADA_AggregateMeasurements <- function(.data, dplyr::mutate(ResultIdentifier = paste0("TADA-", ResultIdentifier)) dat <- plyr::rbind.fill(dat, out) } - + if (clean == TRUE) { dat <- subset(dat, !dat$TADA.ResultValueAggregation.Flag %in% c(paste0("Considered in ", agg_fun, " aggregation function but not selected"))) } - + dat <- TADA_CreateComparableID(dat) dat <- TADA_OrderCols(dat) message("Aggregation results:") message(table(dat$TADA.ResultValueAggregation.Flag)) return(dat) } -} \ No newline at end of file +} diff --git a/R/UnitConversions.R b/R/UnitConversions.R index 9e9a2f7f0..1dd36c108 100644 --- a/R/UnitConversions.R +++ b/R/UnitConversions.R @@ -404,7 +404,6 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) { "TADA.DetectionQuantitationLimitMeasure.MeasureValue", "TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode" ) - TADA_CheckColumns(.data, expected_cols) # list of conversion columns @@ -904,8 +903,19 @@ TADA_ConvertDepthUnits <- function(.data, # "ResultDepthHeightMeasure" # ), transform = TRUE) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + expected_cols <- c( + "ActivityDepthHeightMeasure.MeasureValue", + "ActivityDepthHeightMeasure.MeasureUnitCode", + "ActivityTopDepthHeightMeasure.MeasureValue", + "ActivityTopDepthHeightMeasure.MeasureUnitCode", + "ActivityBottomDepthHeightMeasure.MeasureValue", + "ActivityBottomDepthHeightMeasure.MeasureUnitCode", + "ResultDepthHeightMeasure.MeasureValue", + "ResultDepthHeightMeasure.MeasureUnitCode" + ) + TADA_CheckColumns(.data, expected_cols) + # check unit is character TADA_CheckType(unit, "character") # check unit argument for valid number of inputs (e.g., vector of character) @@ -936,19 +946,6 @@ TADA_ConvertDepthUnits <- function(.data, # check transform is boolean TADA_CheckType(transform, "logical") - # .data required columns - expected_cols <- c( - "ActivityDepthHeightMeasure.MeasureValue", - "ActivityDepthHeightMeasure.MeasureUnitCode", - "ActivityTopDepthHeightMeasure.MeasureValue", - "ActivityTopDepthHeightMeasure.MeasureUnitCode", - "ActivityBottomDepthHeightMeasure.MeasureValue", - "ActivityBottomDepthHeightMeasure.MeasureUnitCode", - "ResultDepthHeightMeasure.MeasureValue", - "ResultDepthHeightMeasure.MeasureUnitCode" - ) - TADA_CheckColumns(.data, expected_cols) - tadacols <- c( "TADA.ActivityDepthHeightMeasure.MeasureValue", "TADA.ActivityDepthHeightMeasure.MeasureUnitCode", diff --git a/R/Utilities.R b/R/Utilities.R index eed4d4484..3726650f5 100644 --- a/R/Utilities.R +++ b/R/Utilities.R @@ -216,9 +216,7 @@ TADA_CheckType <- function(arg, type, paramName = deparse(substitute(arg))) { #' @param expected_cols A vector of expected column names as strings #' @return Invisible `NULL` if all expected columns are present; otherwise, an error is thrown. TADA_CheckColumns <- function(.data, expected_cols) { - if (!inherits(.data, "data.frame")) { - stop("Input must be a dataframe.") - } + TADA_CheckType(.data, "data.frame", "Input object") # check .data is data.frame if (!is.vector(expected_cols) || !is.character(expected_cols)) { stop("Expected columns must be a character vector.") @@ -548,17 +546,14 @@ TADA_ConvertSpecialChars <- function(.data, col, percent.ave = TRUE, #' unique(df4$TADA.CharacteristicName) #' } TADA_SubstituteDeprecatedChars <- function(.data) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - + # check .data is data.frame and has required columns + TADA_CheckColumns(.data, c("CharacteristicName")) # Check if the input data frame is empty if (nrow(.data) == 0) { message("The entered data frame is empty. The function will not run.") return(NULL) # Exit the function early } - TADA_CheckColumns(.data, expected_cols = c("CharacteristicName")) - if ("TADA.CharacteristicName" %in% colnames(.data)) { .data <- .data } else { @@ -615,23 +610,20 @@ TADA_SubstituteDeprecatedChars <- function(.data) { #' #' @export TADA_CreateComparableID <- function(.data) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") - + # check .data is data.frame and has required columns + expected_cols = c( + "TADA.CharacteristicName", + "TADA.ResultSampleFractionText", + "TADA.MethodSpeciationName", + "TADA.ResultMeasure.MeasureUnitCode" + ) + TADA_CheckColumns(.data, expected_cols) # Check if the input data frame is empty if (nrow(.data) == 0) { message("The entered data frame is empty. The function will not run.") return(NULL) # Exit the function early } - TADA_CheckColumns(.data, - expected_cols = c( - "TADA.CharacteristicName", - "TADA.ResultSampleFractionText", - "TADA.MethodSpeciationName", - "TADA.ResultMeasure.MeasureUnitCode" - ) - ) .data$TADA.ComparableDataIdentifier <- paste(.data$TADA.CharacteristicName, .data$TADA.ResultSampleFractionText, @@ -671,27 +663,27 @@ TADA_FormatDelimitedString <- function(delimited_string, delimiter = ",") { #' Generate a Random Water Quality Portal (WQP) Dataset #' #' This function retrieves water quality data for a randomly selected period -#' within the past 20 years using `TADA_DataRetrieval`. It can be used to test -#' functions on random datasets. The function ensures that the returned dataset -#' contains at least 10 results. If the initial random dataset contains fewer -#' than 10 results, the function automatically queries another random dataset +#' within the past 20 years using `TADA_DataRetrieval`. It can be used to test +#' functions on random datasets. The function ensures that the returned dataset +#' contains at least 10 results. If the initial random dataset contains fewer +#' than 10 results, the function automatically queries another random dataset #' until the criteria are met. #' -#' @param number_of_days Numeric. Specifies the number of days for which data -#' will be queried. The default is 1, which queries data for a random two-day +#' @param number_of_days Numeric. Specifies the number of days for which data +#' will be queried. The default is 1, which queries data for a random two-day #' period (e.g., startDate = "2015-04-21", endDate = "2015-04-22"). #' Users can increase this number to retrieve data for more days. #' #' @param choose_random_state Boolean (TRUE or FALSE). Default is FALSE. -#' If FALSE, the function queries all available WQP data for the specified -#' number_of_days (national query). If TRUE, the function selects a random state +#' If FALSE, the function queries all available WQP data for the specified +#' number_of_days (national query). If TRUE, the function selects a random state #' and retrieves data only for that state. #' #' @param autoclean Boolean (TRUE or FALSE). Default is TRUE. #' If TRUE, the function applies `TADA_AutoClean` as part of the `TADA_DataRetrieval`. #' If FALSE, the function does not apply `TADA_AutoClean`. #' -#' @param max_attempts Numeric. Specifies the maximum number of attempts to +#' @param max_attempts Numeric. Specifies the maximum number of attempts to #' retrieve data if an error occurs. Default is 3. #' #' @return A data frame containing a random WQP dataset with at least 10 results, @@ -719,7 +711,7 @@ TADA_FormatDelimitedString <- function(delimited_string, delimiter = ",") { #' # within a randomly selected state without auto-cleaning #' random_data_state_no_clean <- TADA_RandomTestingData( #' number_of_days = 5, -#' choose_random_state = TRUE, +#' choose_random_state = TRUE, #' autoclean = FALSE #' ) #' } @@ -733,7 +725,7 @@ TADA_RandomTestingData <- function(number_of_days = 1, twenty_years_ago <- Sys.Date() - 20 * 365 random_start_date <- twenty_years_ago + sample(20 * 365, 1) end_date <- random_start_date + ndays - + # Determine if a random state should be selected if (state_choice) { load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA")) @@ -741,35 +733,38 @@ TADA_RandomTestingData <- function(number_of_days = 1, } else { state <- "null" } - + # Print the selected date range and state code print(list( startDate = as.character(random_start_date), endDate = as.character(end_date), statecode = state )) - + # Attempt to retrieve data, retrying if an error occurs attempt <- 1 while (attempt <= max_attempts) { - dat <- tryCatch({ - TADA_DataRetrieval( - startDate = as.character(random_start_date), - endDate = as.character(end_date), - statecode = state, - applyautoclean = ac, - ask = FALSE - ) - }, error = function(e) { - message("Attempt ", attempt, ": An error occurred - ", e$message) - return(NULL) # Return NULL to indicate failure - }) - + dat <- tryCatch( + { + TADA_DataRetrieval( + startDate = as.character(random_start_date), + endDate = as.character(end_date), + statecode = state, + applyautoclean = ac, + ask = FALSE + ) + }, + error = function(e) { + message("Attempt ", attempt, ": An error occurred - ", e$message) + return(NULL) # Return NULL to indicate failure + } + ) + # If data retrieval was successful, return the data if (!is.null(dat)) { return(dat) } - + # Increment attempt counter and try a new query attempt <- attempt + 1 random_start_date <- twenty_years_ago + sample(20 * 365, 1) @@ -778,12 +773,12 @@ TADA_RandomTestingData <- function(number_of_days = 1, state <- sample(statecodes_df$STUSAB, 1) } } - + # If all attempts fail, return an empty data frame message("Failed to retrieve data after ", max_attempts, " attempts.") return(data.frame()) } - + # Internal function to ensure dataset has at least 10 results verify_random_data <- function() { repeat { @@ -792,7 +787,7 @@ TADA_RandomTestingData <- function(number_of_days = 1, } return(df) } - + # Retrieve and return the verified dataset df <- verify_random_data() return(df) @@ -1649,7 +1644,8 @@ renameATTAINSCols <- function(.data, return_list = FALSE, format = "tada") { "sediment", "taste_color_and_odor", "temperature", "total_toxics", "toxic_inorganics", "toxic_organics", "trash", "turbidity", "cyclestatus", "orig_fid", "waterType", "xwalk_method", "xwalk_huc12_version", - "chlorine", "biotoxins") + "chlorine", "biotoxins" + ) # if return list equals TRUE, return the list of tada formatted column names if (return_list == TRUE & format == "tada") { @@ -1663,13 +1659,14 @@ renameATTAINSCols <- function(.data, return_list = FALSE, format = "tada") { # if return equals FALSE, proceed with renaming columns if (return_list == FALSE) { - # assign old and new name vectors based on format selected by user - old.names <- dplyr::case_when(format == "tada" ~ attains.orig, + old.names <- dplyr::case_when( + format == "tada" ~ attains.orig, format == "attains" ~ attains.tada ) - new.names <- dplyr::case_when(format == "tada" ~ attains.tada, + new.names <- dplyr::case_when( + format == "tada" ~ attains.tada, format == "attains" ~ attains.orig ) @@ -1678,11 +1675,11 @@ renameATTAINSCols <- function(.data, return_list = FALSE, format = "tada") { view <- data.table::setnames( - .data, - old = old.names, - new = new.names, - skip_absent = TRUE - ) + .data, + old = old.names, + new = new.names, + skip_absent = TRUE + ) # remove intermediate objects rm(attains.tada, attains.orig, old.names, new.names) diff --git a/R/autoClean.R b/R/autoClean.R index 1129d8ed6..730e5b3ae 100644 --- a/R/autoClean.R +++ b/R/autoClean.R @@ -128,10 +128,17 @@ #' # Run TADA_AutoClean #' Autocleaned_TADAProfile <- TADA_AutoClean(TADAProfile) #' } -#' +#' TADA_AutoClean <- function(.data) { - # check .data is data.frame - TADA_CheckType(.data, "data.frame", "Input object") + # check .data is data.frame and has required columns + expected_cols <- c( + "ActivityMediaName", "ResultMeasureValue", "ResultMeasure.MeasureUnitCode", + "CharacteristicName", "ResultSampleFractionText", "MethodSpeciationName", + "DetectionQuantitationLimitMeasure.MeasureUnitCode", "ResultDetectionConditionText", + "ResultIdentifier", "DetectionQuantitationLimitMeasure.MeasureValue", + "LatitudeMeasure", "LongitudeMeasure" + ) + TADA_CheckColumns(.data, expected_cols) # Check if the input data frame is empty if (nrow(.data) == 0) { @@ -144,18 +151,6 @@ TADA_AutoClean <- function(.data) { cols <- names(.data) .data <- .data %>% dplyr::mutate_at(cols, as.character) - # .data required columns - required_cols <- c( - "ActivityMediaName", "ResultMeasureValue", "ResultMeasure.MeasureUnitCode", - "CharacteristicName", "ResultSampleFractionText", "MethodSpeciationName", - "DetectionQuantitationLimitMeasure.MeasureUnitCode", "ResultDetectionConditionText", - "ResultIdentifier", "DetectionQuantitationLimitMeasure.MeasureValue", - "LatitudeMeasure", "LongitudeMeasure" - ) - - # check .data has required columns - TADA_CheckColumns(.data, required_cols) - # execute function after checks are passed diff --git a/tests/testthat/test-ATTAINSCrosswalk.R b/tests/testthat/test-ATTAINSCrosswalk.R index ba4bafed1..66ee6faea 100644 --- a/tests/testthat/test-ATTAINSCrosswalk.R +++ b/tests/testthat/test-ATTAINSCrosswalk.R @@ -9,7 +9,7 @@ testthat::test_that("TADA_CreateParamRef ", { auto_assign = "None", excel = FALSE ) - + suppressWarnings( use_param_ref_none <- TADA_CreateUseParamRef( test_dat, @@ -25,7 +25,7 @@ testthat::test_that("TADA_CreateParamRef ", { ATTAINS.ParameterName = unique(test_dat$TADA.ComparableDataIdentifier), ATTAINS.UseName = rep(c("example use_name1", "example use_name2"), length(unique(test_dat$TADA.ComparableDataIdentifier))) ) - + suppressWarnings( use_param_ref_none2 <- TADA_CreateUseParamRef( test_dat, @@ -36,12 +36,12 @@ testthat::test_that("TADA_CreateParamRef ", { excel = FALSE ) ) - + # A user supplied table when paramRef has no crosswalk should return 0 rows (empty data frame). # Check to make sure there are 7 columns, 0 rows testthat::expect_true( dim(use_param_ref_none)[1] == 0 && dim(use_param_ref_none)[2] == 7 && - dim(use_param_ref_none2)[1] == 0 && dim(use_param_ref_none2)[2] == 7 + dim(use_param_ref_none2)[1] == 0 && dim(use_param_ref_none2)[2] == 7 ) ################# param_ref_all <- TADA_CreateParamRef( @@ -50,7 +50,7 @@ testthat::test_that("TADA_CreateParamRef ", { auto_assign = "All", excel = FALSE ) - + suppressWarnings( use_param_ref_all <- TADA_CreateUseParamRef( test_dat, @@ -60,14 +60,14 @@ testthat::test_that("TADA_CreateParamRef ", { excel = FALSE ) ) - + # a user supplied table for a param_use will not populate TADA_CreateUseParamRef if paramRef crosswalk is left blank. user.supplied.uses.param <- data.frame( ATTAINS.OrganizationIdentifier = rep("MTDEQ", length(unique(param_ref_all$ATTAINS.ParameterName))), # we have assigned to example uses to each parameter ATTAINS.ParameterName = unique(param_ref_all$ATTAINS.ParameterName), ATTAINS.UseName = rep(c("example use_name1", "example use_name2"), length(unique(param_ref_all$ATTAINS.ParameterName))) ) - + suppressWarnings( use_param_ref_all2 <- TADA_CreateUseParamRef( test_dat, @@ -78,14 +78,14 @@ testthat::test_that("TADA_CreateParamRef ", { excel = FALSE ) ) - + # A user supplied table when paramRef is filled out when auto_assign = FALSE # should all reflect the user supplied crosswalk. Check all unique uses to make sure. # Check to make sure there are 7 columns, 0 rows testthat::expect_true( all(unique(use_param_ref_all2$ATTAINS.UseName) %in% unique(user.supplied.uses.param$ATTAINS.UseName)) && - dim(use_param_ref_all)[2] == 7 && - dim(use_param_ref_all2)[2] == 7 + dim(use_param_ref_all)[2] == 7 && + dim(use_param_ref_all2)[2] == 7 ) ################# param_ref_org <- TADA_CreateParamRef( @@ -94,12 +94,12 @@ testthat::test_that("TADA_CreateParamRef ", { auto_assign = "Org", excel = FALSE ) - + # check to make sure all rows contain equal number of rows and for equal number of unique ComparableDataIdentifiers testthat::expect_true( - length(unique(test_dat$TADA.ComparableDataIdentifier)) == nrow(param_ref_none) && + length(unique(test_dat$TADA.ComparableDataIdentifier)) == nrow(param_ref_none) && nrow(param_ref_none) == nrow(param_ref_all) && nrow(param_ref_all) == nrow(param_ref_org) - ) + ) }) # Test: Auto_assign criteria table should contain all unique TADA.Characteristics/TADA.ComparableDataIdentifier @@ -112,10 +112,10 @@ testthat::test_that("TADA_DefineCriteriaMethodology ", { displayUniqueId = FALSE, excel = FALSE # uncomment to run the excel file - #excel = TRUE, overwrite = TRUE + # excel = TRUE, overwrite = TRUE ) ) - + suppressWarnings( Criteria_autofill_w_uniqueID <- TADA_DefineCriteriaMethodology( test_dat, @@ -129,8 +129,7 @@ testthat::test_that("TADA_DefineCriteriaMethodology ", { ) # check to make sure all criteria table has same number of TADA.Characteristics/TADA.ComparableDataIdentifiers testthat::expect_true( - length(unique(test_dat$TADA.ComparableDataIdentifier)) == length(unique(Criteria_autofill_w_uniqueID$TADA.ComparableDataIdentifier)) && - length(unique(test_dat$TADA.CharacteristicName)) == length(unique(Criteria_autofill$TADA.CharacteristicName)) + length(unique(test_dat$TADA.ComparableDataIdentifier)) == length(unique(Criteria_autofill_w_uniqueID$TADA.ComparableDataIdentifier)) && + length(unique(test_dat$TADA.CharacteristicName)) == length(unique(Criteria_autofill$TADA.CharacteristicName)) ) }) - diff --git a/tests/testthat/test-CensoredDataSuite.R b/tests/testthat/test-CensoredDataSuite.R index 08f235fc4..2c9a249f4 100644 --- a/tests/testthat/test-CensoredDataSuite.R +++ b/tests/testthat/test-CensoredDataSuite.R @@ -14,15 +14,15 @@ test_that("TADA_IDCensoredData copies det lim values to result values if applica # Process the data with TADA_IDCensoredData copycheck1 <- TADA_IDCensoredData(copycheck) - + # Subset rows where the original result value is NA copycheck2 <- subset(copycheck1, subset = is.na(copycheck1$ResultMeasureValue)) - + # Skip the test if there are no rows in copycheck2 if (nrow(copycheck2) == 0) { skip("No rows in copycheck2; test skipped.") } - + # Validate the ResultMeasureValueDataTypes.Flag valid_flags <- c( "Result Value/Unit Copied from Detection Limit", @@ -30,18 +30,20 @@ test_that("TADA_IDCensoredData copies det lim values to result values if applica "NA - Not Available" ) expect_true(all(copycheck2$TADA.ResultMeasureValueDataTypes.Flag %in% valid_flags)) - + # Subset data where DetectionQuantitationLimitMeasure.MeasureValue is not NA copycheck_NAs <- subset(copycheck2, subset = !is.na( - copycheck2$TADA.DetectionQuantitationLimitMeasure.MeasureValue)) - + copycheck2$TADA.DetectionQuantitationLimitMeasure.MeasureValue + )) + # Skip the test if there are no rows in copycheck_NAs if (nrow(copycheck_NAs) == 0) { - skip("No rows in copycheck_NAs; test skipped.")} - + skip("No rows in copycheck_NAs; test skipped.") + } + # Check flags and result measure values for non-NA detection limit measure values - expect_true(all(copycheck_NAs$TADA.ResultMeasureValueDataTypes.Flag == - "Result Value/Unit Copied from Detection Limit")) + expect_true(all(copycheck_NAs$TADA.ResultMeasureValueDataTypes.Flag == + "Result Value/Unit Copied from Detection Limit")) }) test_that("TADA_IDCensoredData correctly handles specific text values such as ND", { diff --git a/tests/testthat/test-UnitConversions.R b/tests/testthat/test-UnitConversions.R index bfd1e8554..58b046dda 100644 --- a/tests/testthat/test-UnitConversions.R +++ b/tests/testthat/test-UnitConversions.R @@ -27,7 +27,7 @@ TADAProfile <- data.frame( test_that("TADA_CheckColumns catches non-dataframe input", { expect_error( TADA_CheckColumns("string", c("A", "B")), - "Input must be a dataframe." + "Input object must be of class 'data.frame'" ) }) @@ -104,12 +104,12 @@ test_that("TADA_ConvertDepthUnits converts meters to m", { # and ResultMeasure.MeasureUnitCode test_that("TADA_CreateUnitRef output contains a row for each TADA.CharacteristicName, TADA.ResultMeasure.MeasureUnitCode, and ResultMeasure.MeasureUnitCode.", { testdat <- TADA_RandomTestingData(number_of_days = 2, choose_random_state = TRUE) - + # Skip the test if the data retrieval results in an empty data frame if (nrow(testdat) == 0) { skip("Data retrieval failed after multiple attempts, skipping the test.") } - + unit.ref <- TADA_CreateUnitRef(testdat) unit.ref <- unit.ref %>% dplyr::select( @@ -117,7 +117,7 @@ test_that("TADA_CreateUnitRef output contains a row for each TADA.Characteristic ResultMeasure.MeasureUnitCode ) %>% dplyr::distinct() - + unit.combs <- TADA_UniqueCharUnitSpeciation(testdat) unit.combs <- unit.combs %>% dplyr::select( @@ -125,7 +125,7 @@ test_that("TADA_CreateUnitRef output contains a row for each TADA.Characteristic ResultMeasure.MeasureUnitCode ) %>% dplyr::distinct() - + compare <- unit.ref %>% dplyr::anti_join(unit.combs) expect_true(nrow(compare) == 0) diff --git a/vignettes/ExampleMod3CriteriaMethodsAltOptions.Rmd b/vignettes/ExampleMod3CriteriaMethodsAltOptions.Rmd index 53403dbdd..51c0f601d 100644 --- a/vignettes/ExampleMod3CriteriaMethodsAltOptions.Rmd +++ b/vignettes/ExampleMod3CriteriaMethodsAltOptions.Rmd @@ -264,9 +264,9 @@ found. ```{r auto-fill-epa304a-and-display-TADA.ComparableDataIdentifier} MT.Criteria_autofill_w_uniqueID <- TADA_DefineCriteriaMethodology( tada.MT.clean, - org_id = "MTDEQ", - auto_assign = TRUE, - displayUniqueId = TRUE, + org_id = "MTDEQ", + auto_assign = TRUE, + displayUniqueId = TRUE, excel = FALSE # uncomment to run the excel file # excel = TRUE, overwrite = TRUE @@ -296,8 +296,8 @@ cycle. MT.Criteria_user_supplied_autofill <- TADA_DefineCriteriaMethodology( .data = tada.MT.clean, criteriaMethods = criteria_table, # user supplied table - all rows are kept from this table - org_id = "MTDEQ", - useAURef = Data_MT_UseAURef, + org_id = "MTDEQ", + useAURef = Data_MT_UseAURef, displayUniqueId = FALSE, epa304a = TRUE, excel = FALSE @@ -321,7 +321,7 @@ load(system.file("extdata", "criteria_table.rda", package = "EPATADA")) MT.Criteria_user_supplied_autofill2 <- TADA_DefineCriteriaMethodology( .data = tada.MT.clean, criteriaMethods = criteria_table, # user supplied table - all rows are kept from this table - org_id = "MTDEQ", + org_id = "MTDEQ", displayUniqueId = TRUE, # will display all unique TADA.ComparableDataIdentifier in this table. epa304a = TRUE, excel = FALSE diff --git a/vignettes/ExampleMod3Workflow.Rmd b/vignettes/ExampleMod3Workflow.Rmd index 5e3bf0a2c..4ce2ba07d 100644 --- a/vignettes/ExampleMod3Workflow.Rmd +++ b/vignettes/ExampleMod3Workflow.Rmd @@ -177,29 +177,32 @@ data set from Montana. Get bacteria and pH data from Missoula County, Montana. ```{r tada-data} -# get MT data +# get MT data tada.MT <- TADA_DataRetrieval( - startDate = "2020-01-01", - endDate = "2022-12-31", - statecode = "MT", - characteristicName = c("Escherichia", - "Escherichia coli", - "pH"), - countycode = "Missoula County", - ask = FALSE) + startDate = "2020-01-01", + endDate = "2022-12-31", + statecode = "MT", + characteristicName = c( + "Escherichia", + "Escherichia coli", + "pH" + ), + countycode = "Missoula County", + ask = FALSE +) # clean up data set (minimal) - tada.MT.clean <- tada.MT %>% - TADA_RunKeyFlagFunctions() %>% - TADA_SimpleCensoredMethods() %>% - TADA_HarmonizeSynonyms() - +tada.MT.clean <- tada.MT %>% + TADA_RunKeyFlagFunctions() %>% + TADA_SimpleCensoredMethods() %>% + TADA_HarmonizeSynonyms() + # remove intermediate objects - rm(tada.MT) - +rm(tada.MT) + # if you cannot run TADA_DataRetrieval query, the example data set can be loaded by uncommenting the code below - - # tada.MT.clean <- Data_MT_MissoulaCounty + +# tada.MT.clean <- Data_MT_MissoulaCounty ``` This example will focus only on Montana. The remainder of this vignette @@ -382,7 +385,7 @@ ParamRef <- MT.ParamRef_None %>% dplyr::mutate(ATTAINS.ParameterName = dplyr::case_when( grepl("PH_NONE_NONE_NONE", TADA.ComparableDataIdentifier) ~ "PH", grepl("ESCHERICHIA COLI", TADA.ComparableDataIdentifier) ~ "ESCHERICHIA COLI (E. COLI)")) %>% - dplyr::bind_rows(data.frame(TADA.ComparableDataIdentifier = "PH_NONE_NONE_NONE", ATTAINS.ParameterName = "PH, HIGH", ATTAINS.OrganizationIdentifier = "MTDEQ")) + dplyr::bind_rows(data.frame(TADA.ComparableDataIdentifier = "PH_NONE_NONE_NONE", ATTAINS.ParameterName = "PH, HIGH", ATTAINS.OrganizationIdentifier = "MTDEQ")) MT.ParamRef_Manual <- TADA_CreateParamRef( tada.MT.clean, @@ -442,7 +445,7 @@ MT.ParamRef_Final <- TADA_CreateParamRef( # excel = TRUE, overwrite = TRUE ) -# Test if the two data frames are same or not. +# Test if the two data frames are same or not. identical(MT.ParamRef_Final[1:4], MT.ParamRef_user_supplied[1:4]) TADA_TableExport(MT.ParamRef_Final) @@ -578,13 +581,13 @@ rExpertQuery to assign any new uses to parameters. ```{r useParamRef-with-useAURef} MT.UseParamRef_with_useAURef <- TADA_CreateUseParamRef( - tada.MT.clean, - org_id = c("MTDEQ"), - paramRef = MT.ParamRef_Final, - useAURef = MT.UseAURef_with_WaterUseRef, # uses will come from the user supplied reference table produced in ExampleMod2Workflow.Rmd - auto_assign = FALSE, - excel = FALSE) - + tada.MT.clean, + org_id = c("MTDEQ"), + paramRef = MT.ParamRef_Final, + useAURef = MT.UseAURef_with_WaterUseRef, # uses will come from the user supplied reference table produced in ExampleMod2Workflow.Rmd + auto_assign = FALSE, + excel = FALSE +) ``` ### Provide a User Supplied useParamRef @@ -658,7 +661,7 @@ MT.UseParamRef_Final <- TADA_CreateUseParamRef( # excel = TRUE, overwrite = TRUE ) -# Test if the two data frames are same or not. +# Test if the two data frames are same or not. identical(MT.UseParamRef_Final[1:5], MT.UseParamRef_user_supplied_Edit[1:5]) TADA_TableExport(MT.useParamRef_Final) @@ -698,11 +701,11 @@ data, please choose displayNA = TRUE ```{r ML_Summary} MT.MLSummaryRef_ML <- TADA_CreateMLSummaryRef( - .data = tada.MT.clean, + .data = tada.MT.clean, useParamRef = MT.UseParamRef_Final, org_id = "MTDEQ", displayNA = FALSE - ) +) ``` ## Define Spatial Summary by Monitoring Location (AU) @@ -724,13 +727,13 @@ for sites with no WQP data, please choose displayNA = TRUE ```{r AU_Summary, eval = FALSE} MT.MLSummaryRef_AU <- TADA_CreateMLSummaryRef( - .data = tada.MT.clean, - useParamRef = MT.UseParamRef_Final, + .data = tada.MT.clean, + useParamRef = MT.UseParamRef_Final, useAURef = MT.UseAURef_with_WaterUseRef, # uses will come from the user supplied reference table produced in ExampleMod2Workflow.Rmd AUMLRef = Final.MT.AUMLRef, org_id = "MTDEQ" # displayNA = FALSE - ) +) ``` ## Assign site-specific spatial criteria @@ -746,8 +749,8 @@ MT.MLSummaryRef_AU2 <- MT.MLSummaryRef_AU %>% dplyr::mutate( UniqueSpatialCriteria = dplyr::case_when( MonitoringLocationIdentifier == "MTVOLWQM_WQX-CLEARWATERR_1" ~ "Example Site Specific" - ) ) + ) ``` Compare the nrow for each of the ML versus AU level of summary. The AU @@ -785,7 +788,7 @@ MT.CriteriaMethods <- TADA_DefineCriteriaMethodology( org_id = "MTDEQ", MLSummaryRef = MT.MLSummaryRef_ML, excel = FALSE - #excel = TRUE, overwrite = TRUE + # excel = TRUE, overwrite = TRUE ) ``` @@ -855,12 +858,15 @@ in the remaining blank PH magnitude values with a range between 6.5 and ```{r finalize-criteria-table} # We will fill in PH magnitude values for this example -MT.CriteriaMethods_Final <- MT.CriteriaMethods_User_Supplied3 %>% dplyr::mutate(MagnitudeValueLower = dplyr::case_when( +MT.CriteriaMethods_Final <- MT.CriteriaMethods_User_Supplied3 %>% + dplyr::mutate(MagnitudeValueLower = dplyr::case_when( grepl("PH_NONE_NONE_NONE", TADA.ComparableDataIdentifier) & ATTAINS.OrganizationIdentifier == "MTDEQ" ~ 6.5, - TRUE ~ MagnitudeValueLower)) %>% - dplyr::mutate(MagnitudeValueUpper = dplyr::case_when( + TRUE ~ MagnitudeValueLower + )) %>% + dplyr::mutate(MagnitudeValueUpper = dplyr::case_when( grepl("PH_NONE_NONE_NONE", TADA.ComparableDataIdentifier) & ATTAINS.OrganizationIdentifier == "MTDEQ" ~ 8.5, - TRUE ~ MagnitudeValueUpper)) + TRUE ~ MagnitudeValueUpper + )) ``` We will supply this final crosswalk table to be reused and validated. diff --git a/vignettes/TADACybertown2025.Rmd b/vignettes/TADACybertown2025.Rmd index f08791d9c..cec47b604 100644 --- a/vignettes/TADACybertown2025.Rmd +++ b/vignettes/TADACybertown2025.Rmd @@ -475,8 +475,9 @@ WQP_clean_subset <- WQP_clean_subset_spatial$TADA_with_ATTAINS View catchments and assessment units on map ```{r TADA_ViewATTAINS} -ATTAINS_map <- TADA_ViewATTAINS(WQP_clean_subset_spatial, - ref_icons = FALSE) +ATTAINS_map <- TADA_ViewATTAINS(WQP_clean_subset_spatial, + ref_icons = FALSE +) ATTAINS_map ``` diff --git a/vignettes/TADAModule2.Rmd b/vignettes/TADAModule2.Rmd index 9f2ebcc9d..7ff44a280 100644 --- a/vignettes/TADAModule2.Rmd +++ b/vignettes/TADAModule2.Rmd @@ -28,9 +28,10 @@ library(knitr) knitr::opts_chunk$set( echo = TRUE, warning = FALSE, - message = FALSE, + message = FALSE, eval = T, - dev.args = list(bg = "transparent")) + dev.args = list(bg = "transparent") +) ``` # Overview and Setup @@ -185,7 +186,7 @@ utils::data("Data_MT_MissoulaCounty", package = "EPATADA") # countycode = "Missoula County", # ask = FALSE # ) -# +# # # clean up data set (not comprehensive) # tada.MT.clean <- tada.MT %>% # TADA_RunKeyFlagFunctions() %>% @@ -298,7 +299,6 @@ user.supplied.cw <- clean.existing.attains.MT %>% MonitoringLocationIdentifier = ATTAINS.MonitoringLocationIdentifier, WaterType = ATTAINS.WaterType ) %>% - # Add an example new assessment unit for demonstration purposes dplyr::bind_rows(c( AssessmentUnitIdentifier = "NEW:EX_MDEQ_WQ_WQX", @@ -321,7 +321,8 @@ MT.AUMLRef <- TADA_CreateAUMLCrosswalk( fill_ATTAINS_catch = TRUE, fill_USGS_catch = TRUE, return_nearest = TRUE, - batch_upload = TRUE) + batch_upload = TRUE +) ``` This example is also included in the EPATADA R package, and can be @@ -438,9 +439,10 @@ intersect our observations: ```{r, eval = T} TADA_with_ATTAINS <- TADA_CreateATTAINSAUMLCrosswalk( - .data = tada.MT.clean, - return_sf = FALSE, - return_nearest = FALSE) + .data = tada.MT.clean, + return_sf = FALSE, + return_nearest = FALSE +) # Can also be performed on the spatial data: # TADA_with_ATTAINS <- TADA_CreateATTAINSAUMLCrosswalk(.data = TADA_spatial, return_sf = FALSE, return_nearest = TRUE) @@ -457,9 +459,10 @@ one ATTAINS assessment unit. ```{r, eval = T} TADA_with_ATTAINS_list <- TADA_CreateATTAINSAUMLCrosswalk( - .data = tada.MT.clean, - return_sf = TRUE, - return_nearest = TRUE) + .data = tada.MT.clean, + return_sf = TRUE, + return_nearest = TRUE +) # return only the closest ATTAINS AU for observations within a catchment with multiple AUs # TADA_with_ATTAINS_list <- TADA_CreateATTAINSAUMLCrosswalk(.data = TADA_spatial, return_sf = TRUE, return_nearest = TRUE) @@ -481,10 +484,11 @@ catchment information from the NHD by entering `fill_USGS_catch = TRUE`: ```{r, eval = T} TADA_with_ATTAINS_filled <- TADA_CreateATTAINSAUMLCrosswalk( - tada.MT.clean, - fill_USGS_catch = TRUE, - return_sf = TRUE, - return_nearest = TRUE) + tada.MT.clean, + fill_USGS_catch = TRUE, + return_sf = TRUE, + return_nearest = TRUE +) ``` When `fill_USGS_catch = TRUE`, the returned list splits observations @@ -632,8 +636,9 @@ prioritized steps: ```{r assign.use.to.AU} MT.UseAURef <- TADA_CreateUseAURef( - AUMLRef = Final.MT.AUMLRef, - org_id = "MTDEQ") + AUMLRef = Final.MT.AUMLRef, + org_id = "MTDEQ" +) ``` #### **Advanced: Assigning Uses to New AUs** @@ -650,11 +655,12 @@ For more information on how to customize this function to suit your needs, enter **`?TADA_CreateWaterUseRef`** into the R console. ```{r assign.use.to.new.AU.with.waterUseRef} -MT.UseAURef_with_WaterUseRef <- +MT.UseAURef_with_WaterUseRef <- TADA_CreateUseAURef( waterUseRef = TADA_CreateWaterUseRef(org_id = "MTDEQ"), AUMLRef = Final.MT.AUMLRef, - org_id = "MTDEQ") + org_id = "MTDEQ" + ) ``` Users also have the option to manually assign use names to new AUs if