Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ export(QAPPDocAvailable)
export(QAPPapproved)
export(ResultValueSpecialCharacters)
export(UncommonAnalyticalMethodID)
export(WQXTargetUnits)
export(readWQPdataTADA)
4 changes: 3 additions & 1 deletion R/ResultFlagsDependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ DepthProfileData <- function(.data,
}
}
}
# check if any Conversion Factor columns were appended
if(all(is.na(match(c("ActDepth.Conversion.Factor",
"ActTopDepth.Conversion.Factor",
"ActBottomDepth.Conversion.Factor",
Expand Down Expand Up @@ -645,4 +646,5 @@ ResultValueSpecialCharacters <- function(.data, clean = FALSE){
stop("'clean' argument must be Boolean (TRUE or FALSE)")
}
}
}
}

58 changes: 30 additions & 28 deletions R/ResultFlagsIndependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#' @export
#'


UncommonAnalyticalMethodID <- function(.data, clean = FALSE){

# check that .data object is compatible with TADA
Expand Down Expand Up @@ -308,9 +307,9 @@ AboveNationalWQXUpperThreshold <- function(.data, clean = FALSE){


# check ResultMeasureValue column is of class numeric
if(class(.data$ResultMeasureValue) != "numeric") {
stop("The ResultMeasureValue column must of class 'numeric'.")
}
# if(class(.data$ResultMeasureValue) != "numeric") {
# stop("The ResultMeasureValue column must of class 'numeric'.")
# }

# execute function after checks are passed
if(all(c("CharacteristicName", "ActivityMediaName", "ResultMeasureValue",
Expand All @@ -330,30 +329,31 @@ AboveNationalWQXUpperThreshold <- function(.data, clean = FALSE){

# join unit.ref to raw.data
check.data <- merge(raw.data, unit.ref[, c("Characteristic", "Source",
"Value", "Value.Unit", "Maximum",
"Conversion.Factor")],
"Value", "Maximum")],
by.x = c("Char.Upper", "Media.Upper", "Unit.Upper"),
by.y = c("Characteristic", "Source", "Value"), all.x = TRUE)
# Run ResultValueSpecialCharacter function to remove special characters and convert class to numeric
check.data <- ResultValueSpecialCharacters(check.data, clean = TRUE)

# Convert result measure value to "Value.Unit" units
check.data$Converted.Value <-
check.data[,"ResultMeasureValue"]/check.data[,"Conversion.Factor"]
# remove upper case columns
check.data <- select(check.data, -c("Char.Upper", "Media.Upper", "Unit.Upper"))
# Run WQXTargetUnits function to convert ResultMeasureValue class to numeric
check.data <- WQXTargetUnits(check.data, convert = TRUE)

# Create flag column, flag rows where Converted.Value > Maximum
# Create flag column, flag rows where ResultMeasureValue > Maximum
flag.data <- check.data %>%
# apply function row by row
dplyr::rowwise() %>%
# create flag column
dplyr::mutate(AboveWQXUpperThreshold = dplyr::case_when(
Converted.Value >= Maximum ~ as.character("Y"),
Converted.Value < Maximum ~ as.character("N")))
ResultMeasureValue >= Maximum ~ as.character("Y"),
ResultMeasureValue < Maximum ~ as.character("N")))

# remove extraneous columns, fix field names
flag.data <- flag.data %>%
dplyr::select(-c("Char.Upper", "Media.Upper", "Unit.Upper", "Value.Unit",
"Maximum", "Conversion.Factor", "Converted.Value"))
dplyr::select(-c("Maximum", "ResultUnitConversion", "ResultMeasureValue",
"ResultMeasure.MeasureUnitCode"))
# rename ResultMeasureValue.Original and ResultMeasureUnitCode.Original
flag.data <- flag.data %>%
dplyr::rename(ResultMeasureValue = ResultMeasureValue.Original) %>%
dplyr::rename(ResultMeasure.MeasureUnitCode = ResultMeasureUnitCode.Original)

# reorder column names to match .data
# get .data column names
Expand Down Expand Up @@ -418,7 +418,7 @@ BelowNationalWQXUpperThreshold <- function(.data, clean = FALSE){

# check ResultMeasureValue column is of class numeric
if(class(.data$ResultMeasureValue) != "numeric") {
stop("The ResultMeasureValue column must of class 'numeric'.")
stop("The ResultMeasureValue column must be of class 'numeric'.")
}

# execute function after checks are passed
Expand All @@ -439,16 +439,13 @@ BelowNationalWQXUpperThreshold <- function(.data, clean = FALSE){

# join unit.ref to raw.data
check.data <- merge(raw.data, unit.ref[, c("Characteristic", "Source",
"Value", "Value.Unit", "Minimum",
"Conversion.Factor")],
"Value", "Minimum")],
by.x = c("Char.Upper", "Media.Upper", "Unit.Upper"),
by.y = c("Characteristic", "Source", "Value"), all.x = TRUE)
# Run ResultValueSpecialCharacter function to remove special characters and convert class to numeric
check.data <- ResultValueSpecialCharacters(check.data, clean = TRUE)

# Convert result measure value to "Value.Unit" units
check.data$Converted.Value <-
check.data[,"ResultMeasureValue"]/check.data[,"Conversion.Factor"]
# remove upper case columns
check.data <- select(check.data, -c("Char.Upper", "Media.Upper", "Unit.Upper"))
# Run WQXTargetUnits function to convert ResultMeasureValue class to numeric
check.data <- WQXTargetUnits(check.data, convert = TRUE)

# Create flag column, flag rows where Converted.Value < Minimum
flag.data <- check.data %>%
Expand All @@ -461,8 +458,12 @@ BelowNationalWQXUpperThreshold <- function(.data, clean = FALSE){

# remove extraneous columns, fix field names
flag.data <- flag.data %>%
dplyr::select(-c("Char.Upper", "Media.Upper", "Unit.Upper", "Value.Unit",
"Minimum", "Conversion.Factor", "Converted.Value"))
dplyr::select(-c("Maximum", "ResultUnitConversion", "ResultMeasureValue",
"ResultMeasure.MeasureUnitCode"))
# rename ResultMeasureValue.Original and ResultMeasureUnitCode.Original
flag.data <- flag.data %>%
dplyr::rename(ResultMeasureValue = ResultMeasureValue.Original) %>%
dplyr::rename(ResultMeasure.MeasureUnitCode = ResultMeasureUnitCode.Original)

# reorder column names to match .data
# get .data column names
Expand Down Expand Up @@ -739,6 +740,7 @@ InvalidCoordinates <- function(.data, clean_outsideUSA = FALSE, clean_imprecise
return (dplyr::select(.data, -InvalidCoordinates))
} else {
return (.data)}

}
else {
stop("clean argument must be Boolean (TRUE or FALSE)")
Expand Down
177 changes: 177 additions & 0 deletions R/Transformations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#' Convert Units to WQX Target Units
#'
#' *** placeholder text for function description
#'
#' @param .data TADA dataset
#' @param clean Boolean argument; removes data that is above the upper WQX
#' threshold from the dataset when clean = TRUE. Default is clean = FALSE.
#'
#' @return When clean = FALSE, a column flagging rows with data that are above
#' the upper WQX threshold is appended to the input data set. When clean = TRUE,
#' data that is above the upper WQX threshold is removed from the dataset.
#'
#' @export

WQXTargetUnits <- function(.data, convert = FALSE){

# check that .data object is compatible with TADA
# check .data is of class data.frame
if(("data.frame" %in% class(.data)) == FALSE) {
stop("Input object must be of class 'data.frame'")
}
# check .data has any of the required columns
if(all(c("CharacteristicName", "ActivityMediaName", "ResultMeasureValue",
"ResultMeasure.MeasureUnitCode") %in% colnames(.data)) == FALSE) {
stop("The dataframe does not contain the required fields to use TADA.
Use either the full physical/chemical profile downloaded from WQP or
download the TADA profile template available on the EPA TADA webpage.")
}

# execute function after checks are passed
if(all(c("CharacteristicName", "ActivityMediaName", "ResultMeasureValue",
"ResultMeasure.MeasureUnitCode") %in% colnames(.data)) == TRUE) {

# filter WQXcharVal.ref to include only valid CharacteristicUnit in water media
unit.ref <- TADA::WQXcharVal.ref %>%
dplyr::filter(Type == "CharacteristicUnit" & Source == "WATER" &
Status == "Valid")

# define raw.data
raw.data <- .data
# duplicate and capitalize CharName, ActivityMediaName, and ResultMeasureUnitCode columns in .data
raw.data$Char.Upper <- toupper(raw.data$CharacteristicName)
raw.data$Media.Upper <- toupper(raw.data$ActivityMediaName)
raw.data$Unit.Upper <- toupper(raw.data$ResultMeasure.MeasureUnitCode)

# join unit.ref to raw.data
check.data <- merge(raw.data, unit.ref[, c("Characteristic", "Source",
"Value", "Value.Unit",
"Conversion.Factor")],
by.x = c("Char.Upper", "Media.Upper", "Unit.Upper"),
by.y = c("Characteristic", "Source", "Value"), all.x = TRUE)
# rename columns
check.data <- check.data %>%
dplyr::rename(TargetUnit = Value.Unit) %>%
dplyr::rename(ConversionFactor = Conversion.Factor)

# if temp data exists, calculate conversion factor
if (all(is.na(match(c("deg F", "deg K"),
check.data$ResultMeasure.MeasureUnitCode))) == FALSE) {

# create numeric result value column (for calculations)
check.data$ResultMeasureValue.Numeric <- suppressWarnings(
as.numeric(check.data$ResultMeasureValue))

# Calculate deg F and deg C, replace Conversion factor values
check.data <- check.data %>%
# apply function row by row
dplyr::rowwise() %>%
# create flag column
dplyr::mutate(ConversionFactor = dplyr::case_when(
ResultMeasure.MeasureUnitCode == "deg F" ~
as.numeric(((ResultMeasureValue.Numeric - 32)*(5/9))/ResultMeasureValue.Numeric),
ResultMeasure.MeasureUnitCode == "deg K" ~
as.numeric((ResultMeasureValue.Numeric - 273.15)/ResultMeasureValue.Numeric),
TRUE ~ ConversionFactor))

# remove numeric result column
check.data <- dplyr::select(check.data, -ResultMeasureValue.Numeric)
}

# add ResultUnitConversion column
flag.data <- check.data %>%
# apply function row by row
dplyr::rowwise() %>%
# create flag column
dplyr::mutate(ResultUnitConversion = dplyr::case_when(
!is.na(TargetUnit) ~ as.character("OK"),
(is.na(TargetUnit) & is.na(ResultMeasureValue)) ~ as.character("NoResultValue"),
is.na(TargetUnit) ~ as.character("NoTargetUnit")))

# remove extraneous columns
flag.data <- flag.data %>%
dplyr::select(-c("Char.Upper", "Media.Upper", "Unit.Upper"))

if(convert == FALSE) {

# reorder column names to match .data
# get .data column names
col.order <- colnames(.data)
# add flag columns to the list
col.order <- append(col.order, c("TargetUnit",
"ConversionFactor", "ResultUnitConversion"))
# reorder columns in flag.data
flag.data <- flag.data[, col.order]

warning("Conversion required for range checks and TADATargetUnit conversions.
Unit conversions and data summaries and calculations may be affected")
return(flag.data)
}

if(convert == TRUE) {

# copy ResultMeasureValue to ResultMeasureValue.Original
flag.data$ResultMeasureValue.Original <- flag.data$ResultMeasureValue

# convert original result measure value to numeric, if it's not already
if(class(flag.data$ResultMeasureValue.Original) != "numeric") {
# convert result measure value class to numeric
flag.data$ResultMeasureValue.Original <- suppressWarnings(
as.numeric(flag.data$ResultMeasureValue.Original))
}

# Convert result measure value to Target Unit only if target unit exists
flag.data <- flag.data %>%
# apply function row by row
dplyr::rowwise() %>%
# apply conversions where there is a target unit, use original value if no target unit
dplyr::mutate(ResultMeasureValue = dplyr::case_when(
!is.na(TargetUnit) ~
(ResultMeasureValue.Original * ConversionFactor),
is.na(TargetUnit) ~ ResultMeasureValue.Original))

# rename ResultMeasure.MeasureUnitCode to ResultMeasureUnitCode.Original and Target Unit to ResultMeasure.MeasureUnitCode
flag.data <- flag.data %>%
dplyr::rename(ResultMeasureUnitCode.Original = ResultMeasure.MeasureUnitCode)

# populate ResultMeasure.MeasureUnitCode
flag.data <- flag.data %>%
# apply function row by row
dplyr::rowwise() %>%
# use target unit where there is a target unit, use original unit if no target unit
dplyr::mutate(ResultMeasure.MeasureUnitCode = dplyr::case_when(
!is.na(TargetUnit) ~ TargetUnit,
is.na(TargetUnit) ~ ResultMeasureUnitCode.Original))
# capitalize ResultMeasure.MeasureUnitCode column for consistency
flag.data$ResultMeasure.MeasureUnitCode <- toupper(flag.data$ResultMeasure.MeasureUnitCode)

# edit ResultUnitConversion column
clean.data <- flag.data %>%
# apply function row by row
dplyr::rowwise() %>%
# create flag column
dplyr::mutate(ResultUnitConversion = dplyr::case_when(
!is.na(ResultMeasure.MeasureUnitCode) ~ as.character("Converted"),
TRUE ~ ResultUnitConversion))

# remove extraneous columns, fix field names
clean.data <- clean.data %>%
dplyr::select(-c("ConversionFactor", "TargetUnit"))

# reorder column names to match .data
# get .data column names
col.order <- colnames(.data)
# add PotentialDupRowID column to the list
col.order <- append(col.order, c("ResultMeasureValue.Original",
"ResultMeasureUnitCode.Original",
"ResultUnitConversion"))
# reorder columns in clean.data
clean.data <- clean.data[, col.order]

return(clean.data)

} else {
stop("convert argument must be Boolean (TRUE or FALSE)")
}
}
}
3 changes: 3 additions & 0 deletions man/InvalidCoordinates.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/WQXTargetUnits.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions test_data/test_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ library(stringr)
#https://www.waterqualitydata.us/data/Project/search?statecode=US%3A24&siteType=Lake%2C%20Reservoir%2C%20Impoundment&siteType=Stream&sampleMedia=water&sampleMedia=Water&startDateLo=01-01-2019&startDateHi=01-01-2022&mimeType=csv&zip=yes&providers=NWIS&providers=STEWARDS&providers=STORET

# Set query parameters ####
WQPquery <- list(statecode = "US:24", Sitetype = c(
"Lake, Reservoir, Impoundment", "Stream"), Samplemedia = c("water", "Water"),
characteristicName = c("Dissolved oxygen (DO)", "pH"),
startDate = "01-01-2019", endDate = "01-01-2022")
WQPquery <- list(statecode = "US:24",
Sitetype = c("Lake, Reservoir, Impoundment", "Stream"),
Samplemedia = c("water", "Water"),
characteristicName = c("Dissolved oxygen (DO)", "pH", "Temperature, water"),
startDate = "01-01-2019",
endDate = "01-01-2022")

# Retrieve all 3 profiles ####
results.DR <- readWQPdata(WQPquery)
Expand Down