Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 56 additions & 20 deletions R/TADA_AutoClean.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,73 @@

library(dplyr)

#' Title
#' TADA Auto Clean
#'
#' **Placeholder text for function description
#'
#' @param .data Full physical/chemical results dataset from WQP
#' @param .data TADA dataset
#' @param FlaggedData Boolean argument indicating whether output will have columns appended to flag data or the output will be a cleaned dataset.
#'
#' @return Full physical/chemical results dataset with duplicate records and continuous data
#' @return Full TADA dataset with duplicate records and continuous data
#' @export
#'
#' @examples WQP.QCed <- TADAautoClean(WQP.raw)

TADAautoClean <- function(.data){
TADAautoClean <- function(.data, FlaggedData = TRUE){

excluded.columns <- c("ActivityIdentifier", "ActivityConductingOrganizationText",
"OrganizationFormalName", "OrganizationIdentifier",
"ProjectIdentifier", "ResultCommentText", "ActivityCommentText")
field.names <- colnames(.data)
excluded.fields <- c("ActivityIdentifier", "ActivityConductingOrganizationText",
"OrganizationFormalName", "OrganizationIdentifier",
"ProjectIdentifier", "ResultCommentText", "ActivityCommentText")
dupe.fields <- field.names[!field.names %in% excluded.fields]

WQP <- .data %>%
# Remove type 1 duplicate data
distinct() %>%
distinct(across(-all_of(excluded.columns)), .keep_all = TRUE) %>%
# Remove continuous data
filter(ResultDetectionConditionText != "Reported in Raw Data (attached)" |
is.na(ResultDetectionConditionText))
if(TADAprofileCheck(.data) == FALSE) {
stop("The dataframe does not contain the required fields to use TADA. Use either the full physical/chemical profile downloaded from WQP or download the TADA profile template available on the EPA TADA webpage.")
}

if(TADAprofileCheck(.data) == TRUE) {

if(FlaggedData == FALSE) {
# Remove type 1 duplicate
clean.data <- .data[!duplicated(.data),]
# Remove type 2 duplicate
clean.data <- clean.data[!duplicated(clean.data[dupe.fields]),]
# Remove continuous data
clean.data <- filter(clean.data,
ResultDetectionConditionText != "Reported in Raw Data (attached)" |
is.na(ResultDetectionConditionText))

return(clean.data)
}

if(FlaggedData == TRUE) {
# Remove type 1 duplicate
flag.data <- .data[!duplicated(.data),]
# Flag type 2 duplicate
flag.data$Duplicate.2 <- as.integer(duplicated(flag.data[dupe.fields]) |
duplicated(flag.data[dupe.fields],
fromLast = TRUE))
# Flag continuous data
# make cont.data data frame
cont.data <- filter(flag.data,
ResultDetectionConditionText == "Reported in Raw Data (attached)")
# append ContDataFlag column
cont.data$ContDataFlag <- 1
# join cont.data to flag.data
flag.data <- merge(flag.data, cont.data, all.x = TRUE)

return(flag.data)
} else {
stop("FlaggedData argument must be Boolean (TRUE or FALSE)")
}
}
}


#' Title
#' TADA Remove Empty Columns
#'
#' **Placeholder text for function description
#'
#' @param .data Full physical/chemical results dataset from WQP
#' @param .data TADA dataset
#'
#' @return Full physical/chemical results dataset without columns containing only NA values
#' @return Full TADA dataset without columns containing only NA values
#' @export
#'
#' @examples WQP.QCed <- TADAremoveEmptyColumns(WQP.raw)
Expand Down
61 changes: 61 additions & 0 deletions R/TADA_ProfileCheck.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#' TADA Profile Check
#'
#' This function checks if the column names in a dataframe include the TADA
#' profile fields. It is used at the beginning of TADA functions to ensure the
#' input data frame is suitable (i.e. is either the full physical/chemical
#' results profile downloaded from WQP or the TADA profile template downloaded
#' from the EPA TADA webpage.)
#'
#' @param .data A dataframe
#'
#' @return Boolean result indicating whether or not the input dataframe contains
#' all of the TADA profile fields.
#'
#' @examples TADAprofileCheck(df)

TADAprofileCheck <- function(.data){

TADA.fields <- c("OrganizationIdentifier", "OrganizationFormalName",
"ActivityIdentifier", "ActivityTypeCode",
"ActivityMediaName", "ActivityMediaSubdivisionName",
"ActivityStartDate", "ActivityStartTime.Time",
"ActivityStartTime.TimeZoneCode", "ActivityEndDate",
"ActivityEndTime.Time", "ActivityEndTime.TimeZoneCode",
"ActivityDepthHeightMeasure.MeasureValue", "ActivityDepthHeightMeasure.MeasureUnitCode",
"ActivityDepthAltitudeReferencePointText", "ActivityTopDepthHeightMeasure.MeasureValue",
"ActivityTopDepthHeightMeasure.MeasureUnitCode", "ActivityBottomDepthHeightMeasure.MeasureValue",
"ActivityBottomDepthHeightMeasure.MeasureUnitCode", "ProjectIdentifier",
"ActivityConductingOrganizationText", "MonitoringLocationIdentifier",
"ActivityCommentText", "SampleAquifer",
"HydrologicCondition", "HydrologicEvent",
"SampleCollectionMethod.MethodIdentifier", "SampleCollectionMethod.MethodIdentifierContext",
"SampleCollectionMethod.MethodName", "SampleCollectionEquipmentName",
"ResultDetectionConditionText", "CharacteristicName",
"ResultSampleFractionText", "ResultMeasureValue",
"ResultMeasure.MeasureUnitCode", "MeasureQualifierCode",
"ResultStatusIdentifier", "StatisticalBaseCode",
"ResultValueTypeName", "ResultWeightBasisText",
"ResultTimeBasisText", "ResultTemperatureBasisText",
"ResultParticleSizeBasisText", "PrecisionValue",
"ResultCommentText", "USGSPCode",
"ResultDepthHeightMeasure.MeasureValue", "ResultDepthHeightMeasure.MeasureUnitCode",
"ResultDepthAltitudeReferencePointText", "SubjectTaxonomicName",
"SampleTissueAnatomyName", "ResultAnalyticalMethod.MethodIdentifier",
"ResultAnalyticalMethod.MethodIdentifierContext", "ResultAnalyticalMethod.MethodName",
"MethodDescriptionText", "LaboratoryName",
"AnalysisStartDate", "ResultLaboratoryCommentText",
"DetectionQuantitationLimitTypeName", "DetectionQuantitationLimitMeasure.MeasureValue",
"DetectionQuantitationLimitMeasure.MeasureUnitCode", "PreparationStartDate",
"ProviderName", "ActivityStartDateTime", "ActivityEndDateTime")

if(class(.data) != "data.frame") {
stop("Input object must be of class 'data.frame'")
}

if(all(TADA.fields %in% colnames(.data)) == TRUE) {
TRUE
} else {
FALSE
}

}