diff --git a/R/ContinuousDataFunctions.R b/R/ContinuousDataFunctions.R
index d823b68d8..2396299c6 100644
--- a/R/ContinuousDataFunctions.R
+++ b/R/ContinuousDataFunctions.R
@@ -5,7 +5,7 @@
#' @description
#' Retrieves available metadata from USGS National Water Information System (NWIS) based on
#' different spatial queries: area of interest (AOI), specific sites, or state boundaries.
-#' Returns a spatial sf object containing continuous monitoring site information and
+#' Returns a spatial sf object containing continuous monitoring site information and
#' available parameters and statistics.
#' If no data is found, returns an empty sf object with appropriate column structure.
#'
@@ -15,7 +15,7 @@
#' @param statecode Character vector of two-letter state codes (e.g., c("CA", "OR")).
#' @param siteid Character vector of USGS site numbers.
#'
-#' @return An sf object containing NWIS continuous monitoring site summary
+#' @return An sf object containing NWIS continuous monitoring site summary
#' information including:
#' \itemize{
#' \item site_no: USGS site identification number
@@ -129,22 +129,20 @@ TADA_listNWIS <- function(aoi_sf = "null", statecode = "null", siteid = "null")
}
# Daily stats info grabber:
-
+
stats_table <- function() {
-
site_url <- "https://help.waterdata.usgs.gov/stat_code"
-
+
table <- rvest::read_html(site_url) %>%
rvest::html_nodes("table") %>%
rvest::html_table() %>%
.[[1]] %>%
dplyr::mutate(stat_cd = sprintf("%05d", `Statistic Type Code`)) %>%
dplyr::select(stat_cd, stat_type = `Statistic Type Description`)
-
+
return(table)
-
}
-
+
# Grab NWIS by an area of interest:
if ((unlist(aoi_sf)[1] != "null")) {
og_epsg <- sf::st_crs(aoi_sf)$epsg
@@ -388,7 +386,7 @@ TADA_listNWIS <- function(aoi_sf = "null", statecode = "null", siteid = "null")
) %>%
# Remove any duplicates if they exist (precautionary - they shouldn't!)
dplyr::distinct(., .keep_all = TRUE)
-
+
# If no data, return empty data frame
if (nrow(inventory) == 0) {
message("No daily USGS-NWIS data in specified query.")
@@ -401,7 +399,7 @@ TADA_listNWIS <- function(aoi_sf = "null", statecode = "null", siteid = "null")
#' Retrieve and tidy daily values from NWIS
#'
#' This function interfaces with the USGS National Water Information System (NWIS) to
-#' retrieve daily values (DV) water quality data using the TADA (Tools for Automated
+#' retrieve daily values (DV) water quality data using the TADA (Tools for Automated
#' Data Analysis) framework. Users can query data based on a spatial area of interest
#' (AOI), a vector of state abbreviations, or a vector of specific site ids, along
#' with relevant USGS parameter codes, statistics to return, and a date range.
@@ -434,16 +432,16 @@ TADA_listNWIS <- function(aoi_sf = "null", statecode = "null", siteid = "null")
#' dplyr::filter(NAME %in% c("Spokane", "Navajo Nation"))
#' sites_aoi_sf <- TADA_getNWIS(
#' aoi_sf = locs_sf,
-#' parameter_codes =
-#' c("00060", "00010"),
-#' start_date = "2020-01-01",
-#' end_date = "2020-01-31"
+#' parameter_codes =
+#' c("00060", "00010"),
+#' start_date = "2020-01-01",
+#' end_date = "2020-01-31"
#' )
#'
#' # Example 2: Query by specific site numbers
#' sites_specific <- TADA_getNWIS(
#' siteid = c("11530500", "11532500"),
-#' parameter_codes = c("00060", "00010"),
+#' parameter_codes = c("00060", "00010"),
#' start_date = "2020-01-01",
#' end_date = "2020-12-31"
#' )
@@ -452,7 +450,7 @@ TADA_listNWIS <- function(aoi_sf = "null", statecode = "null", siteid = "null")
#' nwis_data <- TADA_getNWIS(
#' statecode = c("RI", "CO"),
#' stat_codes = c("00001"),
-#' parameter_codes = c("00010"),
+#' parameter_codes = c("00010"),
#' start_date = "2020-01-01",
#' end_date = "2020-01-02"
#' )
diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index f1dc20c5a..177a1faf3 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -83,6 +83,7 @@
#' See https://www.waterqualitydata.us/Codes/project for options.
#' @param providers Leave blank to include all, or specify "STEWARDS", "STORET" (i.e., WQX), and/or
#' "NWIS". See https://www.waterqualitydata.us/Codes/providers for options.
+#' @param bBox The latitude and longitude extent. Includes four numbers, e.g. bBox <- c(-xmin, ymin, -xmax, ymax).
#' @param maxrecs Maximum number of records to query at once (i.e., without breaking into smaller
#' queries).
#' @param ask A logical value (TRUE or FALSE) indicating whether the user should be asked for approval before
@@ -165,6 +166,9 @@
#' endDate = "2023-12-31",
#' ask = FALSE
#' )
+#'
+#' bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562)
+#' tada8 <- TADA_DataRetrieval(bBox = bbox)
#' }
#'
TADA_DataRetrieval <- function(startDate = "null",
@@ -184,6 +188,7 @@ TADA_DataRetrieval <- function(startDate = "null",
organization = "null",
project = "null",
providers = "null",
+ bBox = "null",
maxrecs = 350000,
ask = TRUE,
applyautoclean = TRUE) {
@@ -312,6 +317,14 @@ TADA_DataRetrieval <- function(startDate = "null",
} else if (providers != "null") {
WQPquery <- c(WQPquery, providers = providers)
}
+
+ # bbox
+ if (length(bBox) > 1) {
+ WQPquery <- c(WQPquery, bBox = list(bBox))
+ } else if (bBox != "null") {
+ WQPquery <- c(WQPquery, bBox = bBox)
+ }
+
# Organization
if (length(organization) > 1) {
WQPquery <- c(WQPquery, organization = list(organization))
@@ -744,6 +757,13 @@ TADA_DataRetrieval <- function(startDate = "null",
WQPquery <- c(WQPquery, providers = providers)
}
+ # bbox
+ if (length(bBox) > 1) {
+ WQPquery <- c(WQPquery, bBox = list(bBox))
+ } else if (bBox != "null") {
+ WQPquery <- c(WQPquery, bBox = bBox)
+ }
+
if (length(organization) > 1) {
WQPquery <- c(WQPquery, organization = list(organization))
} else if (organization != "null") {
diff --git a/R/ExampleData.R b/R/ExampleData.R
index 3bd941fc9..5deadfb8b 100644
--- a/R/ExampleData.R
+++ b/R/ExampleData.R
@@ -121,3 +121,14 @@ NULL
#' @usage data(Data_HUC8_02070004_Mod1Output)
#' @format A data frame with 84 rows and 161 variables
NULL
+
+#' cybertown
+#'
+#' See TADACybertown2025.Rmd in vignettes folder.
+#'
+#' @docType data
+#' @keywords data frame
+#' @name cybertown
+#' @usage data(cybertown)
+#' @format A data frame with 990 rows and 150 variables
+NULL
diff --git a/R/Figures.R b/R/Figures.R
index 40190dd90..ed8a6ed0b 100644
--- a/R/Figures.R
+++ b/R/Figures.R
@@ -647,7 +647,7 @@ TADA_FlaggedSitesMap <- function(.data) {
#'
#' @param .data TADA dataframe after running TADA.FindNearbySites.
#' @param dist_buffer Distance in m to show a radius around each site marker.
-#'
+#'
#'
#' @return A leaflet map that shows all sites in the dataframe that contain
#' flagged data in the form of near other sites - groups of sites that are spatially located within
@@ -668,61 +668,69 @@ TADA_FlaggedSitesMap <- function(.data) {
#' }
#'
TADA_NearbySitesMap <- function(.data, dist_buffer = 100) {
-
- if(c("TADA.NearbySiteGroup") %in% colnames(.data) == FALSE) {
+ if (c("TADA.NearbySiteGroup") %in% colnames(.data) == FALSE) {
.data <- TADA_FindNearbySites(.data)
}
.data <- .data %>%
dplyr::filter(!is.na(TADA.NearbySiteGroup)) %>%
- dplyr::mutate(LatitudeMeasure = as.numeric(LatitudeMeasure),
- LongitudeMeasure = as.numeric(LongitudeMeasure)) %>%
- dplyr::select(LongitudeMeasure, LatitudeMeasure, TADA.MonitoringLocationIdentifier,
- MonitoringLocationIdentifier, MonitoringLocationName, TADA.LatitudeMeasure,
- TADA.LongitudeMeasure, OrganizationIdentifier, TADA.NearbySiteGroup) %>%
- dplyr::distinct()
-
- icon.colors <- grDevices::rainbow(as.numeric(length(unique(.data$TADA.NearbySiteGroup))))
-
- pal <- leaflet::colorFactor(palette = icon.colors,
- domain = .data$TADA.NearbySiteGroup)
-
+ dplyr::mutate(
+ LatitudeMeasure = as.numeric(LatitudeMeasure),
+ LongitudeMeasure = as.numeric(LongitudeMeasure)
+ ) %>%
+ dplyr::select(
+ LongitudeMeasure, LatitudeMeasure, TADA.MonitoringLocationIdentifier,
+ MonitoringLocationIdentifier, MonitoringLocationName, TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure, OrganizationIdentifier, TADA.NearbySiteGroup
+ ) %>%
+ dplyr::distinct()
+
+ icon.colors <- grDevices::rainbow(as.numeric(length(unique(.data$TADA.NearbySiteGroup))))
+
+ pal <- leaflet::colorFactor(
+ palette = icon.colors,
+ domain = .data$TADA.NearbySiteGroup
+ )
+
map <- leaflet::leaflet(.data) %>%
leaflet::addProviderTiles("Esri.WorldTopoMap", group = "World topo", options = leaflet::providerTileOptions(updateWhenZooming = FALSE, updateWhenIdle = TRUE)) %>%
leaflet.extras::addResetMapButton() # button to reset to initial zoom and lat/long
if (nrow(.data) > 0) {
- map <- map %>% leaflet::addCircleMarkers(~LongitudeMeasure,
- ~LatitudeMeasure,
- color = ~pal(TADA.NearbySiteGroup),
- opacity = 1,
- fillColor = ~pal(TADA.NearbySiteGroup),
- fillOpacity = 1,
- radius = ifelse(dist_buffer > 200,
- dist_buffer/10,
- 20),
- weight = 1,
- # label = ~as.character(TADA.MonitoringLocationIdentifier),
- popup = paste0(
- "Nearby Group Name: ", .data$TADA.MonitoringLocationIdentifier,
- "
Nearby Site Group: ", .data$TADA.NearbySiteGroup,
- "
Site ID: ", .data$MonitoringLocationIdentifier,
- "
Site Name: ", .data$MonitoringLocationName,
- "
Latitude: ", .data$LatitudeMeasure,
- "
Longitude: ", .data$LongitudeMeasure
- ),
- data = .data,
- clusterOptions = leaflet::markerClusterOptions(),
- ) %>%
+ map <- map %>%
+ leaflet::addCircleMarkers(~LongitudeMeasure,
+ ~LatitudeMeasure,
+ color = ~ pal(TADA.NearbySiteGroup),
+ opacity = 1,
+ fillColor = ~ pal(TADA.NearbySiteGroup),
+ fillOpacity = 1,
+ radius = ifelse(dist_buffer > 200,
+ dist_buffer / 10,
+ 20
+ ),
+ weight = 1,
+ # label = ~as.character(TADA.MonitoringLocationIdentifier),
+ popup = paste0(
+ "Nearby Group Name: ", .data$TADA.MonitoringLocationIdentifier,
+ "
Nearby Site Group: ", .data$TADA.NearbySiteGroup,
+ "
Site ID: ", .data$MonitoringLocationIdentifier,
+ "
Site Name: ", .data$MonitoringLocationName,
+ "
Latitude: ", .data$LatitudeMeasure,
+ "
Longitude: ", .data$LongitudeMeasure
+ ),
+ data = .data,
+ clusterOptions = leaflet::markerClusterOptions(),
+ ) %>%
leaflet::addCircles(~LongitudeMeasure,
- ~LatitudeMeasure,
- color = ~pal(TADA.NearbySiteGroup),
- opacity = 0.1,
- fillColor = ~pal(TADA.NearbySiteGroup),
- fillOpacity = 0.1,
- radius = dist_buffer,
- weight = 1)
+ ~LatitudeMeasure,
+ color = ~ pal(TADA.NearbySiteGroup),
+ opacity = 0.1,
+ fillColor = ~ pal(TADA.NearbySiteGroup),
+ fillOpacity = 0.1,
+ radius = dist_buffer,
+ weight = 1
+ )
}
-
+
return(map)
}
diff --git a/R/GeospatialFunctions.R b/R/GeospatialFunctions.R
index c5fc4d355..0e0268627 100644
--- a/R/GeospatialFunctions.R
+++ b/R/GeospatialFunctions.R
@@ -1118,80 +1118,80 @@ fetchNHD <- function(.data, resolution = "Hi", features = "catchments") {
#' TADA_GetATTAINS
#'
-#' Link catchment-based ATTAINS assessment unit data
-#' (EPA snapshot of NHDPlus HR catchments associated with entity submitted
-#' assessment unit features - points, lines, and polygons) to Water Quality
-#' Portal observations, often imported via `TADA_DataRetrieval()`. This
-#' function returns the objects that can be mapped in `TADA_ViewATTAINS()`.
-#' Check out the TADAModule2.Rmd for an example workflow. Note that
-#' approximately 80% of state submitted assessment units in ATTAINS were
+#' Link catchment-based ATTAINS assessment unit data
+#' (EPA snapshot of NHDPlus HR catchments associated with entity submitted
+#' assessment unit features - points, lines, and polygons) to Water Quality
+#' Portal observations, often imported via `TADA_DataRetrieval()`. This
+#' function returns the objects that can be mapped in `TADA_ViewATTAINS()`.
+#' Check out the TADAModule2.Rmd for an example workflow. Note that
+#' approximately 80% of state submitted assessment units in ATTAINS were
#' developed based on high res NHDPlus, so we are using that as the default.
#'
-#' The ATTAINS snapshot of NHDPlus HR catchments is not available for areas
+#' The ATTAINS snapshot of NHDPlus HR catchments is not available for areas
#' that do not have existing Assessment Units in ATTAINS. For these areas where
#' there are WQP sites, but no existing ATTAINS assessment units, a user can
#' choose to associate the WQP sites with NHDPlus catchments available from
-#' the USGS nhdplusTools package (please be aware that USGS and EPA ATTAINS
-#' snapshots of the NHDPlus catchments may vary) using the optional function
+#' the USGS nhdplusTools package (please be aware that USGS and EPA ATTAINS
+#' snapshots of the NHDPlus catchments may vary) using the optional function
#' param 'fill_catchments'. If desired by the user, the HR
-#' catchments could be created as new assessment unit polygons in ATTAINS
+#' catchments could be created as new assessment unit polygons in ATTAINS
#' (that process is outside of TADA).
#'
-#' `ResultIdentifier' identifies rows that are the same observation but are
-#' linked to multiple ATTAINS assessment units. It is possible for a single
-#' TADA WQP observation to have multiple ATTAINS assessment units linked to
+#' `ResultIdentifier' identifies rows that are the same observation but are
+#' linked to multiple ATTAINS assessment units. It is possible for a single
+#' TADA WQP observation to have multiple ATTAINS assessment units linked to
#' it and subsequently more than one row of data.
#'
#' If TADA_MakeSpatial has not yet been run, this function runs it which also
-#' adds another new column to the input dataframe, 'geometry', which allows
+#' adds another new column to the input dataframe, 'geometry', which allows
#' for mapping and additional geospatial capabilities.
#'
-#' Please review the output of this function carefully, especially waterbody
-#' intersections (tributaries), lake/ocean coasts, and other areas with
-#' complex hydrology where imprecise WQP monitoring location coordinates can
-#' be problematic. Note that many WQP locations will not fall within the bounds
-#' of NHDPlus (estuaries, oceans). Manual adjustments and quality control checks
-#' are strongly encouraged. WQP monitoring location metadata may also be helpful
-#' for matching waterbody names with ATTAINS waterbody names instead of relying
+#' Please review the output of this function carefully, especially waterbody
+#' intersections (tributaries), lake/ocean coasts, and other areas with
+#' complex hydrology where imprecise WQP monitoring location coordinates can
+#' be problematic. Note that many WQP locations will not fall within the bounds
+#' of NHDPlus (estuaries, oceans). Manual adjustments and quality control checks
+#' are strongly encouraged. WQP monitoring location metadata may also be helpful
+#' for matching waterbody names with ATTAINS waterbody names instead of relying
#' solely on the geospatial location (lat/long).
#'
-#' @param .data A dataframe created by `TADA_DataRetrieval()` or the sf
+#' @param .data A dataframe created by `TADA_DataRetrieval()` or the sf
#' equivalent made by `TADA_MakeSpatial()`.
-#' @param return_nearest If a WQP observation falls within more than one AU,
-#' return ONLY the nearest AU (return_nearest = TRUE), or all AUs
+#' @param return_nearest If a WQP observation falls within more than one AU,
+#' return ONLY the nearest AU (return_nearest = TRUE), or all AUs
#' (return_nearest = FALSE).
#' @param fill_catchments Whether the user would like to return NHD catchments
-#' (USGS snapshot of NHDPlus V2) for WQP observations not associated with an
-#' ATTAINS assessment unit (TRUE or FALSE). When fill_catchments = TRUE,
-#' the returned list splits observations into two dataframes: WQP observations
-#' with ATTAINS catchment data (EPA snapshot of NHDPlus V2), and WQP
+#' (USGS snapshot of NHDPlus V2) for WQP observations not associated with an
+#' ATTAINS assessment unit (TRUE or FALSE). When fill_catchments = TRUE,
+#' the returned list splits observations into two dataframes: WQP observations
+#' with ATTAINS catchment data (EPA snapshot of NHDPlus V2), and WQP
#' observations without ATTAINS catchment data. Defaults to FALSE.
#' @param resolution If fill_catchments = TRUE, whether to use NHDPlus V2 "Med"
-#' catchments or NHDPlus V2 HiRes "Hi" catchments. Default is NHDPlus V2 HiRes
-#' ("Hi") because at approximately 80% of state submitted assessment units in
+#' catchments or NHDPlus V2 HiRes "Hi" catchments. Default is NHDPlus V2 HiRes
+#' ("Hi") because at approximately 80% of state submitted assessment units in
#' ATTAINS were developed based on NHDPlus V2 HiRes.
-#' @param return_sf Whether to return the ATTAINS associated catchments, lines,
-#' points, and polygon shapefile objects along with the data frame(s).
-#' TRUE (yes, return list) or FALSE (no, do not return). All shapefile features
-#' are in WGS84 (crs = 4326). If fill_catchments = TRUE and return_sf = TRUE,
-#' the function will additionally return the raw catchment features associated
-#' with the observations in TADA_without_ATTAINS in a new shapefile called
+#' @param return_sf Whether to return the ATTAINS associated catchments, lines,
+#' points, and polygon shapefile objects along with the data frame(s).
+#' TRUE (yes, return list) or FALSE (no, do not return). All shapefile features
+#' are in WGS84 (crs = 4326). If fill_catchments = TRUE and return_sf = TRUE,
+#' the function will additionally return the raw catchment features associated
+#' with the observations in TADA_without_ATTAINS in a new shapefile called
#' without_ATTAINS_catchments. Defaults to TRUE.
#'
-#' @return A modified `TADA_DataRetrieval()` dataframe or list with additional
-#' columns associated with the ATTAINS assessment unit data, and, if
-#' fill_catchments = TRUE, an additional dataframe of the observations without
+#' @return A modified `TADA_DataRetrieval()` dataframe or list with additional
+#' columns associated with the ATTAINS assessment unit data, and, if
+#' fill_catchments = TRUE, an additional dataframe of the observations without
#' intersecting ATTAINS features.
-#' Moreover, if return_sf = TRUE, this function will additionally return the
-#' raw ATTAINS and catchment shapefile features associated with those
-#' observations.
-#'
-#' This function calculates and reports the distance, 'TADA.DistanceAway.Meters',
-#' between each WQP observation and intersecting ATTAINS features within its
-#' catchment. A TADA.DistanceAway.Meters value of 0 indicates that the WQP
-#' observation is directly on the associated ATTAINS point or line feature,
+#' Moreover, if return_sf = TRUE, this function will additionally return the
+#' raw ATTAINS and catchment shapefile features associated with those
+#' observations.
+#'
+#' This function calculates and reports the distance, 'TADA.DistanceAway.Meters',
+#' between each WQP observation and intersecting ATTAINS features within its
+#' catchment. A TADA.DistanceAway.Meters value of 0 indicates that the WQP
+#' observation is directly on the associated ATTAINS point or line feature,
#' or located inside the associated ATTAINS polygon.
-#'
+#'
#' @seealso [TADA_DataRetrieval()]
#' @seealso [TADA_MakeSpatial()]
#' @seealso [TADA_ViewATTAINS()]
@@ -1209,7 +1209,7 @@ fetchNHD <- function(.data, resolution = "Hi", features = "catchments") {
#' ask = FALSE
#' )
#'
-#' # note: these example ATTAINS data retrieval queries below may take a long
+#' # note: these example ATTAINS data retrieval queries below may take a long
#' # time (10+ minutes) to run
#' tada_attains <- TADA_GetATTAINS(tada_data,
#' fill_catchments = FALSE,
diff --git a/R/RequiredCols.R b/R/RequiredCols.R
index be2ee39ec..9ade30d2f 100644
--- a/R/RequiredCols.R
+++ b/R/RequiredCols.R
@@ -3,6 +3,8 @@
# ordered list of TADA workflow required columns to be retained in dataframe
require.cols <- c(
+ "ResultIdentifier", # required
+
# Sample/Measurement Type (e.g. QC or Not)
"ActivityTypeCode", # required
"TADA.ActivityType.Flag", # generated
@@ -13,6 +15,28 @@ require.cols <- c(
"TADA.ActivityMediaName", # generated/required/replaces original
"ActivityMediaSubdivisionName", # filter
+ # Organization Monitoring Locations
+ "CountryCode",
+ "StateCode",
+ "CountyCode",
+ "MonitoringLocationName", # required
+ "TADA.MonitoringLocationName", # generated
+ "MonitoringLocationTypeName",
+ "TADA.MonitoringLocationTypeName", # generated
+ "MonitoringLocationDescriptionText",
+ "LatitudeMeasure",
+ "TADA.LatitudeMeasure", # generated
+ "LongitudeMeasure",
+ "TADA.LongitudeMeasure", # generated
+ "HorizontalCoordinateReferenceSystemDatumName",
+ "TADA.SuspectCoordinates.Flag", # generated
+ "HUCEightDigitCode",
+ "MonitoringLocationIdentifier", # required
+ "TADA.MonitoringLocationIdentifier",
+ "TADA.NearbySites.Flag", # generated,
+ "TADA.NearbySiteGroup", # generated
+ "TADA.DistanceAway.Meters", # generated
+
# Comparable Data Groups (e.g. Observable Properties)
"ResultSampleFractionText", # required in Module 1 but is replaced by TADA version in future modules
"TADA.ResultSampleFractionText", # generated/required/replaces original
@@ -145,7 +169,6 @@ require.cols <- c(
"SamplingDesignTypeCode",
"LaboratoryName",
"ResultLaboratoryCommentText",
- "ResultIdentifier", # required
"ActivityIdentifier", # required
# Organization (e.g. data submitter)
@@ -166,27 +189,6 @@ require.cols <- c(
"QAPPApprovalAgencyName",
"TADA.QAPPDocAvailable", # generated, based on ProjectFileUrl
- # Organization Monitoring Locations
- "CountryCode",
- "StateCode",
- "CountyCode",
- "MonitoringLocationName", # required
- "TADA.MonitoringLocationName", # generated
- "MonitoringLocationTypeName",
- "TADA.MonitoringLocationTypeName", # generated
- "MonitoringLocationDescriptionText",
- "LatitudeMeasure",
- "TADA.LatitudeMeasure", # generated
- "LongitudeMeasure",
- "TADA.LongitudeMeasure", # generated
- "HorizontalCoordinateReferenceSystemDatumName",
- "TADA.SuspectCoordinates.Flag", # generated
- "HUCEightDigitCode",
- "MonitoringLocationIdentifier", # required
- "TADA.MonitoringLocationIdentifier",
- "TADA.NearbySites.Flag", # generated,
- "TADA.NearbySiteGroup", # generated
-
# Groundwater fields, used for auto filtering for assessment use case but should not be required to have in TADA template
"AquiferName", # filter, groundwater
"AquiferTypeName", # filter
@@ -219,10 +221,10 @@ extra.cols <- c(
"AnalysisStartDate",
"ResultDetectionQuantitationLimitUrl",
"LabSamplePreparationUrl",
- "timeZoneStart", # no longer in default dataRetrieval profile? 11/7/24
- "timeZoneEnd", # no longer in default dataRetrieval profile? 11/7/24
- "ActivityStartTime.TimeZoneCode_offset", # new column from default dataRetrieval profile? 11/7/24
- "ActivityEndTime.TimeZoneCode_offset", # new column from default dataRetrieval profile? 11/21/24
+ "timeZoneStart", # no longer in default USGS dataRetrieval profile? 11/7/24
+ "timeZoneEnd", # no longer in default USGS dataRetrieval profile? 11/7/24
+ "ActivityStartTime.TimeZoneCode_offset", # new column from default USGS dataRetrieval profile? 11/7/24
+ "ActivityEndTime.TimeZoneCode_offset", # new column from default USGS dataRetrieval profile? 11/21/24
"SourceMapScaleNumeric",
"HorizontalAccuracyMeasure.MeasureValue",
"HorizontalAccuracyMeasure.MeasureUnitCode",
diff --git a/R/ResultFlagsIndependent.R b/R/ResultFlagsIndependent.R
index 1a8257746..359b2e9ed 100644
--- a/R/ResultFlagsIndependent.R
+++ b/R/ResultFlagsIndependent.R
@@ -1311,8 +1311,10 @@ TADA_FindPotentialDuplicatesMultipleOrgs <- function(.data, dist_buffer = 100,
org_hierarchy = "none") {
# from those datapoints, determine which are in adjacent sites
if (!"TADA.NearbySites.Flag" %in% names(.data)) {
- .data <- TADA_FindNearbySites(.data, dist_buffer = dist_buffer,
- org_hierarchy = org_hierarchy)
+ .data <- TADA_FindNearbySites(.data,
+ dist_buffer = dist_buffer,
+ org_hierarchy = org_hierarchy
+ )
}
dupsites <- unique(.data[, c(
diff --git a/R/Utilities.R b/R/Utilities.R
index 8791df3ba..0efd122cf 100644
--- a/R/Utilities.R
+++ b/R/Utilities.R
@@ -109,10 +109,10 @@ utils::globalVariables(c(
"MONITORING_DATA_LINK_TEXT", "PARCEL_NO", "TRIBE_NAME", "everything",
"resultCount", "tribal_area", "txtProgressBar", "Date", "NWIS.parameter",
"NWIS.status", "NWIS.value", "TADA.DistanceAway.Meters", "agency_cd begin_date",
- "cluster", "count", "count_nu", "data_type", "data_type_cd", "dec_lat_va",
- "dec_long_va", "end_date", "parameter_code", "parameter_name_description",
+ "cluster", "count", "count_nu", "data_type", "data_type_cd", "dec_lat_va",
+ "dec_long_va", "end_date", "parameter_code", "parameter_name_description",
"parm_cd site_no", "site_tp_cd", "site_type", "st_drop_geometry", "station_nm",
- "Statistic Type Code", "Statistic Type Description", "agency_cd", "begin_date",
+ "Statistic Type Code", "Statistic Type Description", "agency_cd", "begin_date",
"parm_cd", "site_no", "stat_cd", "stat_type", "grouped.sites", "n", "nearby", "rainbow"
))
@@ -609,70 +609,69 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100,
dist.matrix <- dist.matrix %>%
units::drop_units()
- rownames(dist.matrix) <- unique.mls$MonitoringLocationIdentifier
- colnames(dist.matrix) <- unique.mls$MonitoringLocationIdentifier
+ rownames(dist.matrix) <- unique.mls$MonitoringLocationIdentifier
+ colnames(dist.matrix) <- unique.mls$MonitoringLocationIdentifier
# convert distances to those within buffer (1) and beyond buffer (0)
- dist.mat1 <- apply(dist.matrix, c(1, 2), function(x) {
- if (x <= dist_buffer) {
- x <- 1
- } else {
- x <- 0
- }
- })
-
- # remove intermediate object
- rm(dist.matrix)
-
- # create adjacency graph
- adj.graph <- igraph::graph_from_adjacency_matrix(dist.mat1, mode = "undirected", diag = FALSE)
-
- # find connected sites
- comp.results <- igraph::components(adj.graph)
-
- # create site group dfs
- group.sites <- data.frame(
- MonitoringLocationIdentifier = names(comp.results$membership),
- Group = comp.results$membership,
- row.names = NULL
- ) %>%
- dplyr::group_by(Group) %>%
- dplyr::mutate(n = length(MonitoringLocationIdentifier)) %>%
- dplyr::filter(n > 1) %>%
- dplyr::select(-n) %>%
- dplyr::ungroup()
-
- # remove intermediate objects
- rm(dist.mat1, adj.graph, comp.results)
-
- if (nrow(group.sites) == 0) { # #if no groups, give a TADA.NearbySiteGroup column filled with
- # "No nearby sites"
- print("TADA_FindNearbySites: No nearby sites detected. Columns for TADA.NearbySitesFlag and TADA.NearbySiteGroup added for tracking purposes.")
-
- .data <- .data %>%
- dplyr::mutate(
- TADA.NearbySites.Flag = "No nearby sites detected.",
- TADA.NearbySiteGroup = NA
- )
-
- return(.data)
+ dist.mat1 <- apply(dist.matrix, c(1, 2), function(x) {
+ if (x <= dist_buffer) {
+ x <- 1
+ } else {
+ x <- 0
}
+ })
+
+ # remove intermediate object
+ rm(dist.matrix)
+
+ # create adjacency graph
+ adj.graph <- igraph::graph_from_adjacency_matrix(dist.mat1, mode = "undirected", diag = FALSE)
+
+ # find connected sites
+ comp.results <- igraph::components(adj.graph)
+
+ # create site group dfs
+ group.sites <- data.frame(
+ MonitoringLocationIdentifier = names(comp.results$membership),
+ Group = comp.results$membership,
+ row.names = NULL
+ ) %>%
+ dplyr::group_by(Group) %>%
+ dplyr::mutate(n = length(MonitoringLocationIdentifier)) %>%
+ dplyr::filter(n > 1) %>%
+ dplyr::select(-n) %>%
+ dplyr::ungroup()
+
+ # remove intermediate objects
+ rm(dist.mat1, adj.graph, comp.results)
- # subset nearby sites
- near.sites <- unique.mls %>%
- dplyr::filter(MonitoringLocationIdentifier %in%
- group.sites$MonitoringLocationIdentifier) %>%
- dplyr::left_join(group.sites, by = dplyr::join_by(MonitoringLocationIdentifier))
+ if (nrow(group.sites) == 0) { # #if no groups, give a TADA.NearbySiteGroup column filled with
+ # "No nearby sites"
+ print("TADA_FindNearbySites: No nearby sites detected. Columns for TADA.NearbySitesFlag and TADA.NearbySiteGroup added for tracking purposes.")
+
+ .data <- .data %>%
+ dplyr::mutate(
+ TADA.NearbySites.Flag = "No nearby sites detected.",
+ TADA.NearbySiteGroup = NA
+ )
+
+ return(.data)
+ }
+
+ # subset nearby sites
+ near.sites <- unique.mls %>%
+ dplyr::filter(MonitoringLocationIdentifier %in%
+ group.sites$MonitoringLocationIdentifier) %>%
+ dplyr::left_join(group.sites, by = dplyr::join_by(MonitoringLocationIdentifier))
+
+ # break into multiple dfs
+ near.dfs <- near.sites %>%
+ dplyr::group_split(Group, .keep = FALSE)
- # break into multiple dfs
- near.dfs <- near.sites %>%
- dplyr::group_split(Group, .keep = FALSE)
-
# fetch nhdplus catchment information
nhd.catch <- near.dfs %>%
purrr::map(~ .x %>%
- fetchNHD(resolution = nhd_res)
- )
+ fetchNHD(resolution = nhd_res))
nhd.catch.all <- dplyr::bind_rows(nhd.catch)
@@ -687,25 +686,25 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100,
dplyr::distinct() %>%
dplyr::group_by(Group, NHD.nhdplusid) %>%
dplyr::mutate(n = length(TADA.MonitoringLocationIdentifier)) %>%
- dplyr::filter(n >1) %>%
+ dplyr::filter(n > 1) %>%
dplyr::select(-n)
-
+
# remove intermediate objects
rm(near.sites, nhd.catch, nhd.catch.all)
-
+
if (nrow(catch.groups) == 0) { # #if no groups, give a TADA.NearbySiteGroup column filled with
# "No nearby sites"
print("TADA_FindNearbySites: No nearby sites detected. Columns for TADA.NearbySitesFlag and TADA.NearbySiteGroup added for tracking purposes.")
-
+
.data <- .data %>%
dplyr::mutate(
TADA.NearbySites.Flag = "No nearby sites detected.",
TADA.NearbySiteGroup = NA
)
-
+
return(.data)
}
-
+
# create df of all groups and create unique id for each group
new.ids <- catch.groups %>%
# create new TADA.MonitoringLocationIdentifier
@@ -719,319 +718,322 @@ TADA_FindNearbySites <- function(.data, dist_buffer = 100,
TADA.NearbySiteGroup = dplyr::cur_group_id()
) %>%
dplyr::ungroup() %>%
- dplyr::select(TADA.MonitoringLocationIdentifier.New, TADA.MonitoringLocationIdentifier,
- TADA.NearbySiteGroup) %>%
+ dplyr::select(
+ TADA.MonitoringLocationIdentifier.New, TADA.MonitoringLocationIdentifier,
+ TADA.NearbySiteGroup
+ ) %>%
dplyr::distinct()
-
+
# remove intermediate objects
rm(catch.groups, near.dfs, unique.mls)
- # create a df of unique grouped sites, do not include any activity start dates
- grouped.no.dates <- new.ids %>%
- dplyr::full_join(.data, by = dplyr::join_by(TADA.MonitoringLocationIdentifier)) %>%
- dplyr::select(TADA.MonitoringLocationName, TADA.MonitoringLocationIdentifier.New,
- TADA.NearbySiteGroup, TADA.MonitoringLocationName, TADA.LatitudeMeasure,
- TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName, OrganizationIdentifier
- ) %>%
- dplyr::distinct() %>%
- sf::st_drop_geometry()
+ # create a df of unique grouped sites, do not include any activity start dates
+ grouped.no.dates <- new.ids %>%
+ dplyr::full_join(.data, by = dplyr::join_by(TADA.MonitoringLocationIdentifier)) %>%
+ dplyr::select(
+ TADA.MonitoringLocationName, TADA.MonitoringLocationIdentifier.New,
+ TADA.NearbySiteGroup, TADA.MonitoringLocationName, TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName, OrganizationIdentifier
+ ) %>%
+ dplyr::distinct() %>%
+ sf::st_drop_geometry()
- # create list of orgs from TADA df
- all.orgs <- unique(.data$OrganizationIdentifier)
+ # create list of orgs from TADA df
+ all.orgs <- unique(.data$OrganizationIdentifier)
- # compare list of orgs from TADA df to user supplied org_hierachy to find missing orgs
- missing.orgs <- setdiff(all.orgs, org_hierarchy)
+ # compare list of orgs from TADA df to user supplied org_hierachy to find missing orgs
+ missing.orgs <- setdiff(all.orgs, org_hierarchy)
- # create string for flagging based on meta_select
- if (meta_select == "random") {
- meta.string <- "random selection"
- }
+ # create string for flagging based on meta_select
+ if (meta_select == "random") {
+ meta.string <- "random selection"
+ }
- if (meta_select == "oldest") {
- meta.string <- "oldest sampling date"
- }
+ if (meta_select == "oldest") {
+ meta.string <- "oldest sampling date"
+ }
- if (meta_select == "newest") {
- meta.string <- "most reccent sampling date"
+ if (meta_select == "newest") {
+ meta.string <- "most reccent sampling date"
+ }
+
+ if (meta_select == "count") {
+ meta.string <- "greatest number of results in TADA data frame"
+ }
+
+ # use org hierarchy for first round of metadata selection
+ if (isTRUE(org_hierarchy == "none")) {
+ # create string for flagging
+ org.string <- "Metadata were selected by "
+
+
+ # print message
+ print("TADA_FindNearbySites: No org_hierarchy supplied by user. Organization will not be taken into account during metadata selection.")
+
+ # create consistent org rank to facilitate meta data selection (all orgs ranked equally)
+ org.ranks <- as.data.frame(all.orgs) %>%
+ dplyr::mutate(OrgRank = 99) %>%
+ dplyr::rename(OrganizationIdentifier = all.orgs)
+ }
+
+ # if org hierarchy is supplied by user
+ if (org_hierarchy[1] != "none") {
+ # create string for flagging
+ org.string <- "Metadata were selected by filtering based on the user supplied hierarchy, then by "
+
+ if (!is.vector(org_hierarchy)) {
+ stop("TADA_FindNearbySites: Organization hierarchy must be supplied as a vector.")
}
- if (meta_select == "count") {
- meta.string <- "greatest number of results in TADA data frame"
+ if (length(org_hierarchy) == 0) {
+ stop("TADA_FindNearbySites: No organization identifiers were supplied.")
}
- # use org hierarchy for first round of metadata selection
- if (isTRUE(org_hierarchy == "none")) {
- # create string for flagging
- org.string <- "Metadata were selected by "
+ if (length(missing.orgs) > 0) {
+ print(paste0(
+ "TADA_FindNearbySites: ", length(missing.orgs),
+ " organization identifiers are missing from org_hierarchy (",
+ stringi::stri_replace_last(paste(missing.orgs, collapse = ", "),
+ fixed = ", ", " and "
+ ), ").",
+ " Function will continue to run using partial org_hierarchy."
+ ))
+ # create df for organization ranks from user-supplied hierarchy
+ org.ranks <- as.data.frame(org_hierarchy) %>%
+ dplyr::mutate(OrgRank = dplyr::row_number()) %>%
+ dplyr::rename(OrganizationIdentifier = org_hierarchy)
- # print message
- print("TADA_FindNearbySites: No org_hierarchy supplied by user. Organization will not be taken into account during metadata selection.")
+ # create df for all organizations missing from user-supplied hierarchy
+ # all missing orgs will share the same rank and be ranked below any orgs supplied by user
+ missing.ranks <- as.data.frame(missing.orgs) %>%
+ dplyr::mutate(OrgRank = (length(org_hierarchy) + 1)) %>%
+ dplyr::rename(OrganizationIdentifier = missing.orgs)
- # create consistent org rank to facilitate meta data selection (all orgs ranked equally)
- org.ranks <- as.data.frame(all.orgs) %>%
- dplyr::mutate(OrgRank = 99) %>%
- dplyr::rename(OrganizationIdentifier = all.orgs)
+ # add missing orgs to org rank df
+ org.ranks <- org.ranks %>%
+ dplyr::bind_rows(missing.ranks)
}
- # if org hierarchy is supplied by user
- if (org_hierarchy[1] != "none") {
- # create string for flagging
- org.string <- "Metadata were selected by filtering based on the user supplied hierarchy, then by "
-
- if (!is.vector(org_hierarchy)) {
- stop("TADA_FindNearbySites: Organization hierarchy must be supplied as a vector.")
- }
-
- if (length(org_hierarchy) == 0) {
- stop("TADA_FindNearbySites: No organization identifiers were supplied.")
- }
-
- if (length(missing.orgs) > 0) {
- print(paste0(
- "TADA_FindNearbySites: ", length(missing.orgs),
- " organization identifiers are missing from org_hierarchy (",
- stringi::stri_replace_last(paste(missing.orgs, collapse = ", "),
- fixed = ", ", " and "
- ), ").",
- " Function will continue to run using partial org_hierarchy."
- ))
-
- # create df for organization ranks from user-supplied hierarchy
- org.ranks <- as.data.frame(org_hierarchy) %>%
- dplyr::mutate(OrgRank = dplyr::row_number()) %>%
- dplyr::rename(OrganizationIdentifier = org_hierarchy)
-
- # create df for all organizations missing from user-supplied hierarchy
- # all missing orgs will share the same rank and be ranked below any orgs supplied by user
- missing.ranks <- as.data.frame(missing.orgs) %>%
- dplyr::mutate(OrgRank = (length(org_hierarchy) + 1)) %>%
- dplyr::rename(OrganizationIdentifier = missing.orgs)
-
- # add missing orgs to org rank df
- org.ranks <- org.ranks %>%
- dplyr::bind_rows(missing.ranks)
- }
-
- if (length(missing.orgs) == 0) {
- # create df for organization ranks from user-supplied hierarchy
- org.ranks <- as.data.frame(org_hierarchy) %>%
- dplyr::mutate(OrgRank = dplyr::row_number()) %>%
- dplyr::rename(OrganizationIdentifier = org_hierarchy)
- }
-
-
- rm(all.orgs, missing.orgs)
+ if (length(missing.orgs) == 0) {
+ # create df for organization ranks from user-supplied hierarchy
+ org.ranks <- as.data.frame(org_hierarchy) %>%
+ dplyr::mutate(OrgRank = dplyr::row_number()) %>%
+ dplyr::rename(OrganizationIdentifier = org_hierarchy)
}
- # add org ranks to df of all TADA.MonitoringLocationIdentifier.New
- org.ranks.added <- grouped.no.dates %>%
- dplyr::left_join(org.ranks, by = dplyr::join_by(OrganizationIdentifier))
- rm(org.ranks)
+ rm(all.orgs, missing.orgs)
+ }
- # filter to retain metadata for TADA.MonitoringLocation.New where there is only one set of
- # metadata from the highest ranked org
- org.meta.filter <- org.ranks.added %>%
- dplyr::group_by(TADA.NearbySiteGroup, OrgRank) %>%
- dplyr::mutate(CountSites = length(OrgRank)) %>%
- dplyr::filter(CountSites == 1) %>%
+ # add org ranks to df of all TADA.MonitoringLocationIdentifier.New
+ org.ranks.added <- grouped.no.dates %>%
+ dplyr::left_join(org.ranks, by = dplyr::join_by(OrganizationIdentifier))
+
+ rm(org.ranks)
+
+ # filter to retain metadata for TADA.MonitoringLocation.New where there is only one set of
+ # metadata from the highest ranked org
+ org.meta.filter <- org.ranks.added %>%
+ dplyr::group_by(TADA.NearbySiteGroup, OrgRank) %>%
+ dplyr::mutate(CountSites = length(OrgRank)) %>%
+ dplyr::filter(CountSites == 1) %>%
+ dplyr::ungroup() %>%
+ dplyr::select(-OrgRank, -CountSites) %>%
+ dplyr::mutate(TADA.NearbySites.Flag = paste0(
+ "This monitoring location was grouped with other nearby site(s). ",
+ org.string, meta.string, "."
+ ))
+
+ # select and assign metadata randomly for grouped sites when meta_select equals "random"
+
+ if (meta_select == "random") {
+ # select random metadata where necessary (no org rank or more than one set of metdata for one
+ # TADA.MonitoringLocationIdentifier.New)
+ random.meta <- org.ranks.added %>%
dplyr::ungroup() %>%
- dplyr::select(-OrgRank, -CountSites) %>%
- dplyr::mutate(TADA.NearbySites.Flag = paste0(
- "This monitoring location was grouped with other nearby site(s). ",
- org.string, meta.string, "."
- ))
+ dplyr::filter(!TADA.NearbySiteGroup %in%
+ org.meta.filter$TADA.NearbySiteGroup) %>%
+ dplyr::group_by(TADA.NearbySiteGroup) %>%
+ dplyr::slice_min(OrgRank) %>%
+ dplyr::select(
+ TADA.MonitoringLocationIdentifier.New,
+ TADA.MonitoringLocationName,
+ TADA.LatitudeMeasure, TADA.LongitudeMeasure,
+ TADA.MonitoringLocationTypeName,
+ TADA.NearbySiteGroup
+ ) %>%
+ dplyr::distinct() %>%
+ dplyr::slice_sample(n = 1) %>%
+ dplyr::ungroup()
- # select and assign metadata randomly for grouped sites when meta_select equals "random"
-
- if (meta_select == "random") {
- # select random metadata where necessary (no org rank or more than one set of metdata for one
- # TADA.MonitoringLocationIdentifier.New)
- random.meta <- org.ranks.added %>%
- dplyr::ungroup() %>%
- dplyr::filter(!TADA.NearbySiteGroup %in%
- org.meta.filter$TADA.NearbySiteGroup) %>%
- dplyr::group_by(TADA.NearbySiteGroup) %>%
- dplyr::slice_min(OrgRank) %>%
- dplyr::select(
- TADA.MonitoringLocationIdentifier.New,
- TADA.MonitoringLocationName,
- TADA.LatitudeMeasure, TADA.LongitudeMeasure,
- TADA.MonitoringLocationTypeName,
- TADA.NearbySiteGroup
- ) %>%
- dplyr::distinct() %>%
- dplyr::slice_sample(n = 1) %>%
- dplyr::ungroup()
-
-
- # join the metadata filtering results to create a df with all metadat to apply to TADA df by
- # TADA.MonitoringLocationIdentifier.New
- select.meta <- random.meta %>%
- dplyr::full_join(org.meta.filter, by = names(random.meta)) %>%
- dplyr::select(-OrganizationIdentifier) %>%
- dplyr::rename(
- TADA.MonitoringLocationName.New = TADA.MonitoringLocationName,
- TADA.LatitudeMeasure.New = TADA.LatitudeMeasure,
- TADA.LongitudeMeasure.New = TADA.LongitudeMeasure,
- TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName
- ) %>%
- dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected randomly.")
-
- # remove intermediate objects
- rm(random.meta, org.ranks.added)
- }
- if (meta_select == "oldest" | meta_select == "newest") {
- # prep site groups for metadata selection by date
- date.meta <- grouped.sites %>%
- dplyr::left_join(org.ranks.added, by = dplyr::join_by(
- TADA.MonitoringLocationIdentifier.New,
- TADA.NearbySiteGroup,
- TADA.MonitoringLocationName,
- TADA.LatitudeMeasure,
- TADA.LongitudeMeasure,
- TADA.MonitoringLocationTypeName,
- OrganizationIdentifier
- )) %>%
- dplyr::filter(!TADA.MonitoringLocationIdentifier.New %in%
- org.meta.filter$TADA.MonitoringLocationIdentifier.New) %>%
- dplyr::mutate(OrgRank = ifelse(is.na(OrgRank), rank.default, OrgRank)) %>%
- dplyr::group_by(TADA.MonitoringLocationIdentifier.New)
-
- if (meta_select == "oldest") {
- # select oldest metadata for group
- date.meta <- date.meta %>%
- dplyr::slice_min(ActivityStartDate)
-
- # specify oldest for flagging string
- date.choice <- "oldest"
- }
-
- if (meta_select == "newest") {
- # select newest metadata for group
- date.meta <- date.meta %>%
- dplyr::slice_max(ActivityStartDate)
-
- # specify newest for flagging string
- date.choice <- "newest"
- }
-
- # select metadata by date
- select.meta <- date.meta %>%
- dplyr::full_join(org.meta.filter, by = dplyr::join_by(
- TADA.MonitoringLocationIdentifier.New,
- TADA.NearbySiteGroup,
- TADA.MonitoringLocationName,
- TADA.LatitudeMeasure,
- TADA.LongitudeMeasure,
- TADA.MonitoringLocationTypeName,
- OrganizationIdentifier
- )) %>%
- dplyr::select(-OrganizationIdentifier, -OrgRank, -ActivityStartDate) %>%
- dplyr::rename(
- TADA.MonitoringLocationName.New = TADA.MonitoringLocationName,
- TADA.LatitudeMeasure.New = TADA.LatitudeMeasure,
- TADA.LongitudeMeasure.New = TADA.LongitudeMeasure,
- TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName
- ) %>%
- dplyr::group_by(TADA.NearbySiteGroup) %>%
- dplyr::slice_sample(n = 1) %>%
- dplyr::mutate(TADA.NearbySites.Flag = paste0(
- "This monitoring location was grouped with other",
- " nearby site(s). Metadata were selected from ",
- "the ", date.choice, " result available."
- ))
+ # join the metadata filtering results to create a df with all metadat to apply to TADA df by
+ # TADA.MonitoringLocationIdentifier.New
+ select.meta <- random.meta %>%
+ dplyr::full_join(org.meta.filter, by = names(random.meta)) %>%
+ dplyr::select(-OrganizationIdentifier) %>%
+ dplyr::rename(
+ TADA.MonitoringLocationName.New = TADA.MonitoringLocationName,
+ TADA.LatitudeMeasure.New = TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure.New = TADA.LongitudeMeasure,
+ TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName
+ ) %>%
+ dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected randomly.")
+
+ # remove intermediate objects
+ rm(random.meta, org.ranks.added)
+ }
+
+ if (meta_select == "oldest" | meta_select == "newest") {
+ # prep site groups for metadata selection by date
+ date.meta <- grouped.sites %>%
+ dplyr::left_join(org.ranks.added, by = dplyr::join_by(
+ TADA.MonitoringLocationIdentifier.New,
+ TADA.NearbySiteGroup,
+ TADA.MonitoringLocationName,
+ TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure,
+ TADA.MonitoringLocationTypeName,
+ OrganizationIdentifier
+ )) %>%
+ dplyr::filter(!TADA.MonitoringLocationIdentifier.New %in%
+ org.meta.filter$TADA.MonitoringLocationIdentifier.New) %>%
+ dplyr::mutate(OrgRank = ifelse(is.na(OrgRank), rank.default, OrgRank)) %>%
+ dplyr::group_by(TADA.MonitoringLocationIdentifier.New)
- rm(date.meta)
+ if (meta_select == "oldest") {
+ # select oldest metadata for group
+ date.meta <- date.meta %>%
+ dplyr::slice_min(ActivityStartDate)
+
+ # specify oldest for flagging string
+ date.choice <- "oldest"
}
- if (meta_select == "count") {
- # select metadata by finding site with greatest number of results in TADA df
- select.meta <- org.ranks.added %>%
- dplyr::left_join(.data, by = dplyr::join_by(
- TADA.MonitoringLocationName, TADA.LatitudeMeasure,
- TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName
- )) %>%
- dplyr::group_by(TADA.MonitoringLocationIdentifier) %>%
- dplyr::mutate(NCount = length(TADA.ResultMeasureValue)) %>%
- dplyr::ungroup() %>%
- dplyr::select(-TADA.MonitoringLocationIdentifier) %>%
- dplyr::distinct() %>%
- dplyr::group_by(TADA.NearbySiteGroup) %>%
- dplyr::slice_max(NCount) %>%
- dplyr::slice_sample(n = 1) %>%
- dplyr::select(
- TADA.MonitoringLocationIdentifier.New, TADA.NearbySiteGroup,
- TADA.MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure,
- TADA.MonitoringLocationTypeName
- ) %>%
- dplyr::rename(
- TADA.MonitoringLocationName.New = TADA.MonitoringLocationName,
- TADA.LatitudeMeasure.New = TADA.LatitudeMeasure,
- TADA.LongitudeMeasure.New = TADA.LongitudeMeasure,
- TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName
- ) %>%
- dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected from MonitoringLocation with the most results available across all characteristics.")
+ if (meta_select == "newest") {
+ # select newest metadata for group
+ date.meta <- date.meta %>%
+ dplyr::slice_max(ActivityStartDate)
+
+ # specify newest for flagging string
+ date.choice <- "newest"
}
- # remove intermediate objects
- rm(grouped.no.dates, org.meta.filter, org.string, meta.string)
+ # select metadata by date
+ select.meta <- date.meta %>%
+ dplyr::full_join(org.meta.filter, by = dplyr::join_by(
+ TADA.MonitoringLocationIdentifier.New,
+ TADA.NearbySiteGroup,
+ TADA.MonitoringLocationName,
+ TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure,
+ TADA.MonitoringLocationTypeName,
+ OrganizationIdentifier
+ )) %>%
+ dplyr::select(-OrganizationIdentifier, -OrgRank, -ActivityStartDate) %>%
+ dplyr::rename(
+ TADA.MonitoringLocationName.New = TADA.MonitoringLocationName,
+ TADA.LatitudeMeasure.New = TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure.New = TADA.LongitudeMeasure,
+ TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName
+ ) %>%
+ dplyr::group_by(TADA.NearbySiteGroup) %>%
+ dplyr::slice_sample(n = 1) %>%
+ dplyr::mutate(TADA.NearbySites.Flag = paste0(
+ "This monitoring location was grouped with other",
+ " nearby site(s). Metadata were selected from ",
+ "the ", date.choice, " result available."
+ ))
- # remove site group from crosswalk
- ml.crosswalk <- new.ids %>%
- sf::st_drop_geometry() %>%
- dplyr::select(-TADA.NearbySiteGroup) %>%
- dplyr::distinct()
+ rm(date.meta)
+ }
- # join selected metadata to TADA df
- .data <- .data %>%
- dplyr::left_join(ml.crosswalk, by = dplyr::join_by(TADA.MonitoringLocationIdentifier)) %>%
- dplyr::left_join(select.meta, by = dplyr::join_by(TADA.MonitoringLocationIdentifier.New)) %>%
+ if (meta_select == "count") {
+ # select metadata by finding site with greatest number of results in TADA df
+ select.meta <- org.ranks.added %>%
+ dplyr::left_join(.data, by = dplyr::join_by(
+ TADA.MonitoringLocationName, TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure, TADA.MonitoringLocationTypeName
+ )) %>%
+ dplyr::group_by(TADA.MonitoringLocationIdentifier) %>%
+ dplyr::mutate(NCount = length(TADA.ResultMeasureValue)) %>%
dplyr::ungroup() %>%
- dplyr::mutate(
- TADA.MonitoringLocationName = ifelse(!is.na(TADA.MonitoringLocationName.New),
- TADA.MonitoringLocationName.New,
- TADA.MonitoringLocationName
- ),
- TADA.LatitudeMeasure = ifelse(!is.na(TADA.LatitudeMeasure.New),
- TADA.LatitudeMeasure.New,
- TADA.LatitudeMeasure
- ),
- TADA.LongitudeMeasure = ifelse(!is.na(TADA.LongitudeMeasure.New),
- TADA.LongitudeMeasure.New,
- TADA.LongitudeMeasure
- ),
- TADA.MonitoringLocationTypeName = ifelse(!is.na(TADA.MonitoringLocationTypeName.New),
- TADA.MonitoringLocationTypeName.New,
- TADA.MonitoringLocationTypeName
- ),
- TADA.MonitoringLocationIdentifier = ifelse(!is.na(TADA.MonitoringLocationIdentifier.New),
- TADA.MonitoringLocationIdentifier.New,
- TADA.MonitoringLocationIdentifier
- )
- ) %>%
+ dplyr::select(-TADA.MonitoringLocationIdentifier) %>%
+ dplyr::distinct() %>%
+ dplyr::group_by(TADA.NearbySiteGroup) %>%
+ dplyr::slice_max(NCount) %>%
+ dplyr::slice_sample(n = 1) %>%
dplyr::select(
- -TADA.MonitoringLocationIdentifier.New, -TADA.MonitoringLocationName.New,
- -TADA.LatitudeMeasure.New, -TADA.LongitudeMeasure.New,
- -TADA.MonitoringLocationTypeName.New
+ TADA.MonitoringLocationIdentifier.New, TADA.NearbySiteGroup,
+ TADA.MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure,
+ TADA.MonitoringLocationTypeName
) %>%
- TADA_OrderCols()
+ dplyr::rename(
+ TADA.MonitoringLocationName.New = TADA.MonitoringLocationName,
+ TADA.LatitudeMeasure.New = TADA.LatitudeMeasure,
+ TADA.LongitudeMeasure.New = TADA.LongitudeMeasure,
+ TADA.MonitoringLocationTypeName.New = TADA.MonitoringLocationTypeName
+ ) %>%
+ dplyr::mutate(TADA.NearbySites.Flag = "This monitoring location was grouped with other nearby site(s). Metadata were selected from MonitoringLocation with the most results available across all characteristics.")
+ }
- # remove intermediate objects
- rm(select.meta, ml.crosswalk, group.sites, new.ids)
+ # remove intermediate objects
+ rm(grouped.no.dates, org.meta.filter, org.string, meta.string)
- # add flag for any ungrouped sites and order columns correctly
- .data <- TADA_OrderCols(.data) %>%
- dplyr::mutate(TADA.NearbySites.Flag = ifelse(is.na(TADA.NearbySiteGroup),
- "No nearby sites detected using input buffer distance.",
- TADA.NearbySites.Flag
- ))
+ # remove site group from crosswalk
+ ml.crosswalk <- new.ids %>%
+ sf::st_drop_geometry() %>%
+ dplyr::select(-TADA.NearbySiteGroup) %>%
+ dplyr::distinct()
- # return TADA df with added columns for tracking
- return(.data)
- }
+ # join selected metadata to TADA df
+ .data <- .data %>%
+ dplyr::left_join(ml.crosswalk, by = dplyr::join_by(TADA.MonitoringLocationIdentifier)) %>%
+ dplyr::left_join(select.meta, by = dplyr::join_by(TADA.MonitoringLocationIdentifier.New)) %>%
+ dplyr::ungroup() %>%
+ dplyr::mutate(
+ TADA.MonitoringLocationName = ifelse(!is.na(TADA.MonitoringLocationName.New),
+ TADA.MonitoringLocationName.New,
+ TADA.MonitoringLocationName
+ ),
+ TADA.LatitudeMeasure = ifelse(!is.na(TADA.LatitudeMeasure.New),
+ TADA.LatitudeMeasure.New,
+ TADA.LatitudeMeasure
+ ),
+ TADA.LongitudeMeasure = ifelse(!is.na(TADA.LongitudeMeasure.New),
+ TADA.LongitudeMeasure.New,
+ TADA.LongitudeMeasure
+ ),
+ TADA.MonitoringLocationTypeName = ifelse(!is.na(TADA.MonitoringLocationTypeName.New),
+ TADA.MonitoringLocationTypeName.New,
+ TADA.MonitoringLocationTypeName
+ ),
+ TADA.MonitoringLocationIdentifier = ifelse(!is.na(TADA.MonitoringLocationIdentifier.New),
+ TADA.MonitoringLocationIdentifier.New,
+ TADA.MonitoringLocationIdentifier
+ )
+ ) %>%
+ dplyr::select(
+ -TADA.MonitoringLocationIdentifier.New, -TADA.MonitoringLocationName.New,
+ -TADA.LatitudeMeasure.New, -TADA.LongitudeMeasure.New,
+ -TADA.MonitoringLocationTypeName.New
+ ) %>%
+ TADA_OrderCols()
+
+ # remove intermediate objects
+ rm(select.meta, ml.crosswalk, group.sites, new.ids)
+
+ # add flag for any ungrouped sites and order columns correctly
+ .data <- TADA_OrderCols(.data) %>%
+ dplyr::mutate(TADA.NearbySites.Flag = ifelse(is.na(TADA.NearbySiteGroup),
+ "No nearby sites detected using input buffer distance.",
+ TADA.NearbySites.Flag
+ ))
+
+ # return TADA df with added columns for tracking
+ return(.data)
+}
diff --git a/R/autoFilter.R b/R/autoFilter.R
index 4c86e6e8a..5f011f6bb 100644
--- a/R/autoFilter.R
+++ b/R/autoFilter.R
@@ -220,10 +220,11 @@ TADA_FieldValuesTable <- function(.data, field = "null", characteristicName = "n
#' that all results not flagged for use in the analysis workflow will be removed
#' and the TADA.UseForAnalysis.Flag column will not be added.
#'
-#' It uses ActivityMediaSubdivisionName, AquiferName,
+#' It uses MonitoringLocationTypeName, ActivityMediaName, ActivityMediaSubdivisionName,
+#' AquiferName,
#' LocalAqfrName, ConstructionDateText, WellDepthMeasure.MeasureValue,
#' WellDepthMeasure.MeasureUnitCode, WellHoleDepthMeasure.MeasureValue, and
-#' WellHoleDepthMeasure.MeasureUnitCode to identify groundwater samples. Users
+#' WellHoleDepthMeasure.MeasureUnitCode to identify samples. Users
#' can select whether sediment, groundwater and/or surface water should be included.
#' An additional column, TADA.UseForAnalysis.Flag, specifies whether each row should
#' be included in the analysis workflow and why. Setting clean = TRUE, means
diff --git a/R/dev/occationalMaintenance.R b/R/dev/occationalMaintenance.R
index 70b48b55e..2079f170d 100644
--- a/R/dev/occationalMaintenance.R
+++ b/R/dev/occationalMaintenance.R
@@ -81,8 +81,8 @@ df <- data.frame(urls, response_code)
# filter for any response codes that are not successful or redirect responses
df_false <- df %>%
dplyr::filter(!grepl("200", response_code) &
- !grepl("301", response_code) &
- !grepl("302", response_code))
+ !grepl("301", response_code) &
+ !grepl("302", response_code))
# Review the output of df_false.
# More information about http response codes can be found here:
@@ -105,10 +105,12 @@ unit.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package
find.dups <- unit.ref %>%
dplyr::filter(Type == "CharacteristicUnit") %>%
dplyr::group_by(Characteristic, Source, Value.Unit) %>%
- dplyr::mutate(Min_n = length(unique(Minimum)),
- Max_n = length(unique(Maximum))) %>%
+ dplyr::mutate(
+ Min_n = length(unique(Minimum)),
+ Max_n = length(unique(Maximum))
+ ) %>%
dplyr::filter(Min_n > 1 |
- Max_n > 1)
+ Max_n > 1)
# create download path
download.path <- file.path(Sys.getenv("USERPROFILE"), "Downloads", "WQXcharValRef_multiples.csv")
@@ -145,37 +147,35 @@ readr::write_csv(find.dups, download.path)
# DRAFT function for overnight testing on lots of example data (incomplete)
-TADA_OvernightTesting <- function(){
-
+TADA_OvernightTesting <- function() {
testing_log <- file("testing_log.txt") # File name of output log
sink(testing_log, append = TRUE, type = "output") # Writing console output to log file
sink(testing_log, append = TRUE, type = "message")
- #cat(readChar(rstudioapi::getSourceEditorContext()$path, # Writing currently opened R script to file
+ # cat(readChar(rstudioapi::getSourceEditorContext()$path, # Writing currently opened R script to file
# file.info(rstudioapi::getSourceEditorContext()$path)$size))
- num_iterations=2
+ num_iterations <- 2
master_missing_codes_df <- data.frame(MeasureQualifierCode = NA, TADA.MeasureQualifierCode.Flag = NA)
for (i in 1:num_iterations) {
-
testing <- TADA_RandomTestingData()
testing2 <- TADA_FlagMeasureQualifierCode(testing)
- #expect_true(all(testing2$TADA.MeasureQualifierCode.Flag != "Not Reviewed"))
+ # expect_true(all(testing2$TADA.MeasureQualifierCode.Flag != "Not Reviewed"))
- #print(unique(testing2$TADA_FlagMeasureQualifierCode))
- #print(unique(testing2$MeasureQualifierCode))
+ # print(unique(testing2$TADA_FlagMeasureQualifierCode))
+ # print(unique(testing2$MeasureQualifierCode))
# load in ResultMeasureQualifier Flag Table
qc.ref <- TADA_GetMeasureQualifierCodeRef() %>%
dplyr::rename(MeasureQualifierCode = Code) %>%
dplyr::select(MeasureQualifierCode, TADA.MeasureQualifierCode.Flag)
- codes = unique(testing2$MeasureQualifierCode)
- missing_codes = codes[!codes %in% qc.ref$MeasureQualifierCode]
+ codes <- unique(testing2$MeasureQualifierCode)
+ missing_codes <- codes[!codes %in% qc.ref$MeasureQualifierCode]
missing_codes_df <- data.frame(MeasureQualifierCode = missing_codes, TADA.MeasureQualifierCode.Flag = "Not Reviewed")
@@ -184,19 +184,17 @@ TADA_OvernightTesting <- function(){
master_missing_codes_df <- dplyr::full_join(missing_codes_df, master_missing_codes_df, by = c("MeasureQualifierCode", "TADA.MeasureQualifierCode.Flag"), copy = TRUE)
View(master_missing_codes_df)
+ }
- }
-
- master_missing_codes_distinct = master_missing_codes_df %>% dplyr::distinct()
+ master_missing_codes_distinct <- master_missing_codes_df %>% dplyr::distinct()
View(master_missing_codes_distinct)
- master_missing_codes_freq = as.data.frame(table(master_missing_codes_df))
+ master_missing_codes_freq <- as.data.frame(table(master_missing_codes_df))
View(master_missing_codes_freq)
closeAllConnections() # Close connection to log file
return(testing_log)
-
- }
+}
diff --git a/R/dev/requiredMaintenance.R b/R/dev/requiredMaintenance.R
index ebb63ee6b..eb7f50d07 100644
--- a/R/dev/requiredMaintenance.R
+++ b/R/dev/requiredMaintenance.R
@@ -69,7 +69,7 @@ TADA_UpdateExampleData <- function() {
y <- TADA_FlagMethod(y, clean = TRUE)
y <- TADA_FlagAboveThreshold(y, clean = TRUE)
y <- TADA_FlagBelowThreshold(y, clean = TRUE)
- # y <- TADA_FindPotentialDuplicatesMultipleOrgs(y, dist_buffer = 100)
+ y <- TADA_FindPotentialDuplicatesMultipleOrgs(y, dist_buffer = 100)
y <- TADA_FindPotentialDuplicatesSingleOrg(y)
y <- dplyr::filter(y, !(MeasureQualifierCode %in% c("D", "H", "ICA", "*")))
y <- TADA_SimpleCensoredMethods(y,
@@ -175,13 +175,13 @@ TADA_UpdateExampleData <- function() {
)
# Remove multiple org duplicates
# OPTIONAL
- # Data_WV <- TADA_FindPotentialDuplicatesMultipleOrgs(
- # Data_WV
- # )
- # Data_WV <- dplyr::filter(
- # Data_WV,
- # TADA.ResultSelectedMultipleOrgs == "Y"
- # )
+ Data_WV <- TADA_FindPotentialDuplicatesMultipleOrgs(
+ Data_WV
+ )
+ Data_WV <- dplyr::filter(
+ Data_WV,
+ TADA.ResultSelectedMultipleOrgs == "Y"
+ )
# Filter out remaining irrelevant data, NA's and empty cols
# REQUIRED
unique(Data_WV$TADA.ResultMeasureValueDataTypes.Flag)
@@ -249,4 +249,4 @@ devtools::test()
# devtools::check()
# more robust test for releases (includes broken link check)
-devtools::check(manual = TRUE, remote = TRUE, incoming = TRUE)
+devtools::check(manual = FALSE, remote = TRUE, incoming = TRUE)
diff --git a/data/Data_6Tribes_5y.rda b/data/Data_6Tribes_5y.rda
index 5b7a6c20d..cdbc4cb48 100644
Binary files a/data/Data_6Tribes_5y.rda and b/data/Data_6Tribes_5y.rda differ
diff --git a/data/Data_6Tribes_5y_Harmonized.rda b/data/Data_6Tribes_5y_Harmonized.rda
index eb1aae4c4..598b3da50 100644
Binary files a/data/Data_6Tribes_5y_Harmonized.rda and b/data/Data_6Tribes_5y_Harmonized.rda differ
diff --git a/data/Data_HUC8_02070004_Mod1Output.rda b/data/Data_HUC8_02070004_Mod1Output.rda
index 76888713a..ac5dba235 100644
Binary files a/data/Data_HUC8_02070004_Mod1Output.rda and b/data/Data_HUC8_02070004_Mod1Output.rda differ
diff --git a/data/Data_NCTCShepherdstown_HUC12.rda b/data/Data_NCTCShepherdstown_HUC12.rda
index e1b40e183..e0f951d2c 100644
Binary files a/data/Data_NCTCShepherdstown_HUC12.rda and b/data/Data_NCTCShepherdstown_HUC12.rda differ
diff --git a/data/Data_Nutrients_UT.rda b/data/Data_Nutrients_UT.rda
index 95f12575f..eb8da1a9c 100644
Binary files a/data/Data_Nutrients_UT.rda and b/data/Data_Nutrients_UT.rda differ
diff --git a/data/Data_R5_TADAPackageDemo.rda b/data/Data_R5_TADAPackageDemo.rda
index c16aba5bc..25a5a0976 100644
Binary files a/data/Data_R5_TADAPackageDemo.rda and b/data/Data_R5_TADAPackageDemo.rda differ
diff --git a/data/cybertown.rda b/data/cybertown.rda
new file mode 100644
index 000000000..629eefaca
Binary files /dev/null and b/data/cybertown.rda differ
diff --git a/inst/WORDLIST b/inst/WORDLIST
index b048e999d..6adc0c712 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -30,8 +30,12 @@ Autofilter
BiasValue
BigDataRetrieval
BiologicalIntentName
+Blodgett
CCO
+CFU
CNENVSER
+COLI
+COMIDs
CRS
CSTtoATTAINSParamCrosswalk
CWA
@@ -47,6 +51,7 @@ CharacteristicNames
CharacteristicsForDepthProfile
Chilton
ComparableDataIdentifier
+ComparableDataIdentifier's
ComparableDataIdentifiers
ConsolidatedDepth
ConstructionDateText
@@ -62,7 +67,7 @@ CreatePairRef
CreateParamRef
CreateUnitRef
CreateUseParamRef
-DOI
+Cybertown
DV
DataFrame
DataQuality
@@ -77,6 +82,7 @@ DetectionQuantitationLimitType
DetectionQuantitationLimitTypeName
DistanceAway
DomainValues
+Dumelle
EASP
EASP's
ECHO's
@@ -120,6 +126,7 @@ HarmonizeSynonyms
HiRes
HorizontalCoordinateReferenceSystemDatumName
Hydrography
+Hydrologic
HydrologicCondition
HydrologicEvent
IDCensoredData
@@ -149,6 +156,7 @@ MeasureUnitCode
MeasureValue
MeasureValueDataTypes
MeetsStandard
+Memoranda
MethodDescriptionText
MethodIdentifier
MethodIdentifierContext
@@ -173,12 +181,16 @@ NCTCShepherdstown
ND's
NHD
NHDPlus
+NHDPlusTools
+NLCD
+NLDI
NWIS
NearbySiteGroup
NearbySites
NoResultValue
NoTargetUnit
NonStandardized
+Nondetects
Nonpoint
NutrientSummation
NutrientSummationEquation
@@ -216,6 +228,7 @@ QAPPAprrovedIndicator
QAPPDocAvailable
QAQC
QAQC'd
+QAQC'ing
QAQCCharacteristicValidation
QC'ing
QCing
@@ -228,6 +241,7 @@ RPD's
RPDs
RProj
RStudio
+RWQC
Rancherias
ReplicateSampleID
Repo
@@ -258,8 +272,11 @@ ResultWeightBasisText
RetainRequired
Rmd
Rtools
+RunKeyFlagFunctions
SFNOES
+SSN
STORET
+STV
SampleCollectionEquipmentName
SampleCollectionMethod
SampleFraction
@@ -272,6 +289,8 @@ Speciation
SpeciationAssumptions
SpeciationUnitConversion
StateCode
+StreamCat
+StreamCatTools
StringA
StringB
SubstituteDeprecatedChars
@@ -289,6 +308,7 @@ TADAShiny
TMDL
TargetUnit
Tesuque
+Thawley
TribalOptions
UG
USEPA
@@ -341,6 +361,7 @@ asNO
assessmentunitidentifier
autoclean
autocleaned
+bBox
bbox
bnd
bottomvalue
@@ -351,6 +372,7 @@ characteristicName
characteristicType
charactersitic
cleanNA
+comid
countrycode
countycode
cristinamullin
@@ -369,6 +391,7 @@ devtools
df
dplyr
du
+dv
eXchange
ecc
ecoregions
@@ -380,6 +403,7 @@ fabec
fetchATTAINS
fetchNHD
flaggedonly
+flowline
flowlines
geeksforgeeks
geopub
@@ -397,12 +421,16 @@ helpdesk
https
huc
hucs
+hydrographic
+hydrography
hydrologic
+hydroloom
importWQP
interoperable
io
lessthan
listNWIS
+mL
magrittr
manys
mapviewer
@@ -411,6 +439,7 @@ mimeType
mlt
myfileRef
nd
+nhdplus
nhdplusTools
nondetections
nonstandardized
@@ -428,6 +457,7 @@ pch
pchIcons
pcodes
ph
+pkgdown
plotly
pre
rATTAINS
@@ -446,6 +476,8 @@ siteid
sitetype
speciation
speciations
+spmodel
+spsurvey
startDate
startDateHi
startDateLo
@@ -453,9 +485,11 @@ statecode
surfacevalue
sysdata
tada
+terra
testthat
th
tigris
+tmap
un
uncomment
uncommenting
@@ -475,4 +509,8 @@ wqx
writeLayer
www
xlsx
+xmax
+xmin
+ymax
+ymin
σ
diff --git a/inst/extdata/AKAllotments.dbf b/inst/extdata/AKAllotments.dbf
index 899479bcf..b76507b66 100644
Binary files a/inst/extdata/AKAllotments.dbf and b/inst/extdata/AKAllotments.dbf differ
diff --git a/inst/extdata/AKVillages.dbf b/inst/extdata/AKVillages.dbf
index da4f62821..01e36916c 100644
Binary files a/inst/extdata/AKVillages.dbf and b/inst/extdata/AKVillages.dbf differ
diff --git a/inst/extdata/AmericanIndian.dbf b/inst/extdata/AmericanIndian.dbf
index be591695a..763a3cc53 100644
Binary files a/inst/extdata/AmericanIndian.dbf and b/inst/extdata/AmericanIndian.dbf differ
diff --git a/inst/extdata/OKTribe.dbf b/inst/extdata/OKTribe.dbf
index 0f47f8a91..e5809ce36 100644
Binary files a/inst/extdata/OKTribe.dbf and b/inst/extdata/OKTribe.dbf differ
diff --git a/inst/extdata/OffReservation.dbf b/inst/extdata/OffReservation.dbf
index 5d5b0b48f..94e563a04 100644
Binary files a/inst/extdata/OffReservation.dbf and b/inst/extdata/OffReservation.dbf differ
diff --git a/inst/extdata/VATribe.dbf b/inst/extdata/VATribe.dbf
index da384213b..cdc0c8ddb 100644
Binary files a/inst/extdata/VATribe.dbf and b/inst/extdata/VATribe.dbf differ
diff --git a/inst/extdata/WQXCharacteristicRef.csv b/inst/extdata/WQXCharacteristicRef.csv
index 8ca079f84..ca5a1b416 100644
--- a/inst/extdata/WQXCharacteristicRef.csv
+++ b/inst/extdata/WQXCharacteristicRef.csv
@@ -4541,6 +4541,7 @@
"2,2-Dimethyldecane","Accepted",""
"2,2-Dimethylheptane","Accepted",""
"2,2-Dimethylhexane","Accepted",""
+"2,2-Dimethylpentane","Accepted",""
"2,2-Dimethylpropane","Accepted",""
"2,2-Oxybis(2-chloropropane)","Accepted",""
"2,2-Propanediamine, 1,1,1,3,3,3-hexafluoro-","Accepted",""
@@ -7006,6 +7007,7 @@
"3,5-Dimethylcyclohexene","Accepted",""
"3,5-Dimethylcyclopentene","Accepted",""
"3,5-Dimethylphenol","Accepted",""
+"3,5-Dimethylundecane","Accepted",""
"3,5-Dinitroaniline","Accepted",""
"3,5-Heptanedione, 1,1,2,2-tetrafluoro-6,6-dimethyl-","Accepted",""
"3,5-Nonanedione, 1,1,2,2-tetrafluoro-","Accepted",""
@@ -15583,6 +15585,7 @@
"Methane, dibromoiodo-","Accepted",""
"Methane, oxybis[(difluoromethoxy)difluoro-","Accepted",""
"Methane, oxybis[difluoro-","Accepted",""
+"Methaneselenonic acid","Accepted",""
"Methanesulfonamide, 1,1,1-trifluoro-N-[(trifluoromethyl)sulfonyl]-","Accepted",""
"Methanesulfonamide, 1,1,1-trifluoro-N-[(trifluoromethyl)sulfonyl]-, lithium salt","Accepted",""
"Methanesulfonic acid, 1,1-difluoro-, 2,2,3,3-tetrafluoropropyl ester","Accepted",""
@@ -22717,6 +22720,7 @@
"cis-1,3-Dimethylcyclohexane","Accepted",""
"cis-1,3-Dimethylcyclopentane","Accepted",""
"cis-1,4-Dichloro-2-butene","Accepted",""
+"cis-1,4-Dimethylcyclohexane","Accepted",""
"cis-1-Bromo-2-chlorocyclohexane","Accepted",""
"cis-1-Ethyl-3-methyl-cyclohexane","Accepted",""
"cis-1-Iodo-2-(heptafluoropropyl)cyclohexane","Accepted",""
diff --git a/inst/extdata/WQXProviderRef.csv b/inst/extdata/WQXProviderRef.csv
index ba58ae27b..bb70b254f 100644
--- a/inst/extdata/WQXProviderRef.csv
+++ b/inst/extdata/WQXProviderRef.csv
@@ -364,6 +364,7 @@
"CMCPD","Cape May County Planning Department","STORET"
"USACOEND","US Army Corps of Engineers, Nashville District","STORET"
"WETLAB","Western Environmental Testing Laboratory (Tribal)","STORET"
+"CHEROKEE","Cherokee Nation (Oklahoma)","STORET"
"TLINGIT-HAIDA","Central Council of the Tlingit & Haida Indian Tribes of Alaska (Tribal)","STORET"
"GILLIAM_SWCD","Gilliam Soil and Water Conservation District","STORET"
"NISQUALLY","Nisqually Indian Tribe (Tribal)","STORET"
@@ -537,7 +538,7 @@
"USEPA_REGION8","USEPA Region 8","STORET"
"CORNELLSCALE","Cornell University Adirondack Fisheries Research Program","STORET"
"LCWCD_WQX","Lincoln Soil and Water Conservation District (Volunteer)*","STORET"
-"AUSABLECENTER.","Ausable Freshwater Center","STORET"
+"AUSABLECENTER","Ausable Freshwater Center","STORET"
"CHNEPCHP_WQX","Charlotte Harbor National Estuaries Program (Florida)","STORET"
"PR_BEACH_WQX","PUERTO RICO ENVIRONMENTAL QUALITY BOARD (BEACH)","STORET"
"MDE_WQPR","Water Quality Protection and Restoration Program","STORET"
@@ -711,7 +712,7 @@
"SBWA1","South Branch Watershed Association (Volunteer)","STORET"
"HDRINC_WQX","HDR Incorporated","STORET"
"CHATTAHOOCHEERIVERKEEPER","Chattahoochee Riverkeeper (CRK) (Volunteer)*","STORET"
-"STORLVD_WQX","Lac Vieux Desert Band of Lake Superior Chippewa (Tribal)","STORET"
+"STORLVD_WQX","Lac Vieux Desert Band of Lake Superior Chippewa","STORET"
"NARS_WQX","EPA National Aquatic Resources Survey (NARS)","STORET"
"USFSWHITEMT","USFS White Mountain National Forest","STORET"
"RIVERSUNLIMITED","Rivers Unlimited","STORET"
@@ -776,7 +777,7 @@
"MYRWA","Mystic River Watershed Association (Massachusetts)","STORET"
"WAYNE_STATE_UNIVERSITY","WAYNE STATE UNIVERSITY","STORET"
"WEARTH_WQX","Minnesota Chippewa Tribe, Minnesota (White Earth Band) (Tribal)","STORET"
-"ALS","Americal Littoral Society (Volunteer)*","STORET"
+"ALS_WQX","Americal Littoral Society (Volunteer)*","STORET"
"CORIVWCH_WQX","Colorado River Watch","STORET"
"ASEPA_WQX","American Samoan EPA","STORET"
"MNPCA_AGW","Minnesota Pollution Control Agency - Ambient Groundwater","STORET"
@@ -1326,6 +1327,7 @@
"11FLKNMS_WQX","Florida Keys National Marine Sanctuary (Florida)","STORET"
"USACENWP_WQX","U.S. Army Corps of Engineers, Portland District","STORET"
"1143709","NEW HAMPSHIRE DEPARTMENT OF ENVIRONMENTAL SERVICES","STORET"
+"TECK_AMERICAN","Teck American Incorporated","STORET"
"NJ_MSU","Montclair State University","STORET"
"MAMIN_WQX","USEPA (Tribal)","STORET"
"CLINCHVALLEYSWCD_WQX","Clinch Valley Soil & Water Conservation District","STORET"
@@ -1349,8 +1351,8 @@
"KIOWA_WQX","Kiowa Indian Tribe of Oklahoma (Tribal)","STORET"
"PORTGRAHAM","Port Graham Village Council (Tribal)","STORET"
"DRBC","Delaware River Basin Commission","STORET"
-"MDEDAT01_WQX","Maryland Dept. of the Environment Dredging Ambient Data","STORET"
"21FLGJAX_WQX","GROUNDWORK JACKSONVILLE INC","STORET"
+"MDEDAT01_WQX","Maryland Dept. of the Environment Dredging Ambient Data","STORET"
"BLR_WQX","BLUELAKERANCHERIA_WQX (Tribal)","STORET"
"21FLSJUD_WQX","ST. JOHNS COUNTY UTILITY DEPARTMENT","STORET"
"R8MONTWQ","R8MONTWQ - EPA Region 8 Organization","STORET"
@@ -1358,7 +1360,7 @@
"SKOKDATA_WQX","Skokomish Tribe","STORET"
"ERWSD","Eagle River Water and Sanitation District","STORET"
"SQUAXIN","Squaxin Island Tribe (Tribal)","STORET"
-"TECK_AMERICAN","Teck American Incorporated","STORET"
+"EVR","EVR Operations Limited","STORET"
"COCHITIPUEBLO","Pueblo of Cochiti, New Mexico (Tribal)","STORET"
"WRWMG","Wallkill River Watershed Management Group (Volunteer)*","STORET"
"UMR","Univ of Missouri Rolla","STORET"
@@ -1462,6 +1464,7 @@
"21FLCOPC_WQX","City of Plant City","STORET"
"21NYDECA_WQX","New York State Dec Division Of Water","STORET"
"AKDNRFOR","Alaska Department of Natural Resources, Forestry","STORET"
+"KAWNATON","Kaw Nation of Oklahoma","STORET"
"21FLALTA_WQX","CITY OF ALTAMONTE SPRINGS","STORET"
"MIDEWIN","Midewin National Tallgrass Prairie","STORET"
"DRMPWQX","Mescalero Apache Tribe DRMP","STORET"
@@ -1562,8 +1565,6 @@
"21FLGTM","Guana Tolomato Matanzas (GTM) Esturarine (NERR - Florida)","STORET"
"HANALEI","Hanalei Watershed Study (Region 9) - California","STORET"
"PR_RIVER","Puerto Rico Environmental Quality Board (Rivers)","STORET"
-"KAWNATON","Kaw Nation of Oklahoma","STORET"
-"CHEROKEE","Cherokee Nation (Oklahoma)","STORET"
"ALASSWCD","Alaska Soil and Water Conservation District","STORET"
"21FLCPSL","City of Port St. Lucie (Florida)","STORET"
"STANDARD","Region 8 Superfund: Standard Mine","STORET"
diff --git a/man/TADA_AnalysisDataFilter.Rd b/man/TADA_AnalysisDataFilter.Rd
index 5c4cd5211..0abe25a6a 100644
--- a/man/TADA_AnalysisDataFilter.Rd
+++ b/man/TADA_AnalysisDataFilter.Rd
@@ -51,10 +51,11 @@ that all results not flagged for use in the analysis workflow will be removed
and the TADA.UseForAnalysis.Flag column will not be added.
}
\details{
-It uses ActivityMediaSubdivisionName, AquiferName,
+It uses MonitoringLocationTypeName, ActivityMediaName, ActivityMediaSubdivisionName,
+AquiferName,
LocalAqfrName, ConstructionDateText, WellDepthMeasure.MeasureValue,
WellDepthMeasure.MeasureUnitCode, WellHoleDepthMeasure.MeasureValue, and
-WellHoleDepthMeasure.MeasureUnitCode to identify groundwater samples. Users
+WellHoleDepthMeasure.MeasureUnitCode to identify samples. Users
can select whether sediment, groundwater and/or surface water should be included.
An additional column, TADA.UseForAnalysis.Flag, specifies whether each row should
be included in the analysis workflow and why. Setting clean = TRUE, means
diff --git a/man/TADA_DataRetrieval.Rd b/man/TADA_DataRetrieval.Rd
index 76062bac8..c64a5f66a 100644
--- a/man/TADA_DataRetrieval.Rd
+++ b/man/TADA_DataRetrieval.Rd
@@ -22,6 +22,7 @@ TADA_DataRetrieval(
organization = "null",
project = "null",
providers = "null",
+ bBox = "null",
maxrecs = 350000,
ask = TRUE,
applyautoclean = TRUE
@@ -87,6 +88,8 @@ See https://www.waterqualitydata.us/Codes/project for options.}
\item{providers}{Leave blank to include all, or specify "STEWARDS", "STORET" (i.e., WQX), and/or
"NWIS". See https://www.waterqualitydata.us/Codes/providers for options.}
+\item{bBox}{The latitude and longitude extent. Includes four numbers, e.g. bBox <- c(-xmin, ymin, -xmax, ymax).}
+
\item{maxrecs}{Maximum number of records to query at once (i.e., without breaking into smaller
queries).}
@@ -213,6 +216,9 @@ tada7 <- TADA_DataRetrieval(
endDate = "2023-12-31",
ask = FALSE
)
+
+bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562)
+tada8 <- TADA_DataRetrieval(bBox = bbox)
}
}
diff --git a/man/TADA_GetATTAINS.Rd b/man/TADA_GetATTAINS.Rd
index edd119fde..de145829a 100644
--- a/man/TADA_GetATTAINS.Rd
+++ b/man/TADA_GetATTAINS.Rd
@@ -105,7 +105,7 @@ tada_data <- TADA_DataRetrieval(
ask = FALSE
)
-# note: these example ATTAINS data retrieval queries below may take a long
+# note: these example ATTAINS data retrieval queries below may take a long
# time (10+ minutes) to run
tada_attains <- TADA_GetATTAINS(tada_data,
fill_catchments = FALSE,
diff --git a/man/TADA_getNWIS.Rd b/man/TADA_getNWIS.Rd
index 574d28699..800b44311 100644
--- a/man/TADA_getNWIS.Rd
+++ b/man/TADA_getNWIS.Rd
@@ -56,16 +56,16 @@ locs_sf <- sf::read_sf("inst/extdata/AmericanIndian.shp") \%>\%
dplyr::filter(NAME \%in\% c("Spokane", "Navajo Nation"))
sites_aoi_sf <- TADA_getNWIS(
aoi_sf = locs_sf,
- parameter_codes =
- c("00060", "00010"),
- start_date = "2020-01-01",
- end_date = "2020-01-31"
+ parameter_codes =
+ c("00060", "00010"),
+ start_date = "2020-01-01",
+ end_date = "2020-01-31"
)
# Example 2: Query by specific site numbers
sites_specific <- TADA_getNWIS(
siteid = c("11530500", "11532500"),
- parameter_codes = c("00060", "00010"),
+ parameter_codes = c("00060", "00010"),
start_date = "2020-01-01",
end_date = "2020-12-31"
)
@@ -74,7 +74,7 @@ sites_specific <- TADA_getNWIS(
nwis_data <- TADA_getNWIS(
statecode = c("RI", "CO"),
stat_codes = c("00001"),
- parameter_codes = c("00010"),
+ parameter_codes = c("00010"),
start_date = "2020-01-01",
end_date = "2020-01-02"
)
diff --git a/man/cybertown.Rd b/man/cybertown.Rd
new file mode 100644
index 000000000..1c9eb316b
--- /dev/null
+++ b/man/cybertown.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ExampleData.R
+\docType{data}
+\name{cybertown}
+\alias{cybertown}
+\title{cybertown}
+\format{
+A data frame with 990 rows and 150 variables
+}
+\usage{
+data(cybertown)
+}
+\description{
+See TADACybertown2025.Rmd in vignettes folder.
+}
+\keyword{data}
+\keyword{frame}
diff --git a/vignettes/GeospatialDataIntegration.Rmd b/vignettes/GeospatialDataIntegration.Rmd
index 900d06dc3..e8262ebab 100644
--- a/vignettes/GeospatialDataIntegration.Rmd
+++ b/vignettes/GeospatialDataIntegration.Rmd
@@ -114,15 +114,17 @@ library(EPATADA)
```
```{r usersetup, eval = F, results = 'hide'}
-remotes::install_github("USEPA/EPATADA",
- ref = "develop",
- dependencies = TRUE,
- force = TRUE)
+remotes::install_github("USEPA/EPATADA",
+ ref = "develop",
+ dependencies = TRUE,
+ force = TRUE
+)
-remotes::install_github("USEPA/StreamCatTools",
- ref = "master",
- dependencies = TRUE,
- force = TRUE)
+remotes::install_github("USEPA/StreamCatTools",
+ ref = "master",
+ dependencies = TRUE,
+ force = TRUE
+)
library(EPATADA)
library(StreamCatTools)
@@ -183,8 +185,7 @@ characters in value columns, (3) converts latitude and longitude values
to numeric, (4) replaces "meters" with "m", (5) replaces deprecated
characteristic names with current WQX names, (6) harmonizes result and
detection limit units to WQX, TADA or user supplied target units, (7)
-converts depths to meters, and (8) creates the column TADA.ComparableID
-by concatenating characteristic name, result sample fraction, method
+converts depths to meters, and (8) creates the column TADA.ComparableDataIdentifier by concatenating characteristic name, result sample fraction, method
speciation, and result measure unit.
Now, let's use EPATADA functions to review, visualize, and whittle the
@@ -212,7 +213,7 @@ GreenBay_FoxRiver <- dplyr::filter(GreenBay_FoxRiver, TADA.SingleOrgDup.Flag ==
#### **Nondetects**
TADA provides some simple methods for dealing with censored results,
-such as using multiplying the detection limit by a user supplied value
+such as multiplying the detection limit by a user supplied value
or leaving the result as is.
```{r censored}
@@ -229,7 +230,7 @@ out these duplicates can prevent issues in analysis.
# find potential dups multiple orgs
GreenBay_FoxRiver <- TADA_FindPotentialDuplicatesMultipleOrgs(GreenBay_FoxRiver)
-# filter out
+# filter out
GreenBay_FoxRiver <- dplyr::filter(GreenBay_FoxRiver, TADA.ResultSelectedMultipleOrgs == "Y")
```
@@ -294,11 +295,13 @@ Filter to focus on frequently monitored characteristics in example data
```{r subset}
GreenBay_FoxRiver_Subset <- GreenBay_FoxRiver %>%
dplyr::filter(TADA.ComparableDataIdentifier %in%
- c("SPECIFIC CONDUCTANCE_NA_NA_US/CM",
- "PH_NA_NA_NA",
+ c(
+ "SPECIFIC CONDUCTANCE_NA_NA_US/CM",
+ "PH_NA_NA_NA",
"TOTAL NITROGEN, MIXED FORMS_UNFILTERED_AS N_MG/L",
"TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L",
- "DISSOLVED OXYGEN (DO)_NA_NA_MG/L"))
+ "DISSOLVED OXYGEN (DO)_NA_NA_MG/L"
+ ))
```
Review organizations for subset
@@ -321,7 +324,7 @@ DT::datatable(GreenBay_FoxRiver_Subset_Stats, fillContainer = TRUE)
Generate scatterplot
```{r scatterplot}
-TADA_TwoCharacteristicScatterplot(GreenBay_FoxRiver_Subset, id_cols = "TADA.ComparableDataIdentifier", groups = c("TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L", "TOTAL NITROGEN, MIXED FORMS_UNFILTERED_AS N_MG/L"))
+TADA_TwoCharacteristicScatterplot(GreenBay_FoxRiver_Subset, id_cols = "TADA.ComparableDataIdentifier", groups = c("TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L", "TOTAL NITROGEN, MIXED FORMS_UNFILTERED_AS N_MG/L"))
```
Generate map
@@ -334,13 +337,13 @@ TADA_OverviewMap(GreenBay_FoxRiver_Subset)
```{r Coordinate issues}
# Change coordinate sign if appropriate
-GreenBay_FoxRiver = TADA_FlagCoordinates(GreenBay_FoxRiver_Subset, clean_outsideUSA = "change sign", clean_imprecise = FALSE)
+GreenBay_FoxRiver <- TADA_FlagCoordinates(GreenBay_FoxRiver_Subset, clean_outsideUSA = "change sign", clean_imprecise = FALSE)
-# This df has NA lons from USGS that must be addressed before TADA_MakeSpatial can be run...
+# This df has NA lons from USGS that must be addressed before TADA_MakeSpatial can be run...
sum(is.na(GreenBay_FoxRiver_Subset$LongitudeMeasure))
# Remove rows with NA lons from df
-GreenBay_FoxRiver_Subset <- GreenBay_FoxRiver_Subset[!is.na(GreenBay_FoxRiver_Subset$LongitudeMeasure),]
+GreenBay_FoxRiver_Subset <- GreenBay_FoxRiver_Subset[!is.na(GreenBay_FoxRiver_Subset$LongitudeMeasure), ]
# Recheck
sum(is.na(GreenBay_FoxRiver_Subset$LongitudeMeasure))
@@ -357,7 +360,7 @@ First, leverage TADA_MakeSpatial to transform a WQP dataframe into a
geospatial sf object.
```{r}
-GreenBay_FoxRiver_sf = TADA_MakeSpatial(GreenBay_FoxRiver_Subset)
+GreenBay_FoxRiver_sf <- TADA_MakeSpatial(GreenBay_FoxRiver_Subset)
```
Then create a unique identifier based on shared lat long values and
@@ -366,8 +369,8 @@ filter to just the 25 unique locations.
```{r}
GreenBay_FoxRiver_sf$latlon <- paste0(GreenBay_FoxRiver_sf$TADA.LongitudeMeasure, GreenBay_FoxRiver_sf$TADA.LatitudeMeasure)
-GreenBay_FoxRiver_sf <- GreenBay_FoxRiver_sf |>
- dplyr::group_by(latlon) |>
+GreenBay_FoxRiver_sf <- GreenBay_FoxRiver_sf |>
+ dplyr::group_by(latlon) |>
dplyr::mutate(loc_id = dplyr::cur_group_id())
GreenBay_FoxRiver_sf_locs <- GreenBay_FoxRiver_sf |>
@@ -384,7 +387,7 @@ GreenBay_FoxRiver_sf_locs$COMID <- as.integer(strsplit(StreamCatTools::sc_get_co
nhdplus_data <- nhdplusTools::subset_nhdplus(GreenBay_FoxRiver_sf_locs$COMID, nhdplus_data = "download")
-outlet <- dplyr::filter(nhdplus_data$NHDFlowline_Network, hydroseq == min(hydroseq))
+outlet <- dplyr::filter(nhdplus_data$NHDFlowline_Network, hydroseq == min(hydroseq))
nhdplusTools::plot_nhdplus(bbox = sf::st_bbox(outlet))
plot(sf::st_transform(sf::st_geometry(GreenBay_FoxRiver_sf_locs), 3857), add = TRUE)
@@ -400,14 +403,16 @@ subsets all of the NHDPlus.
all_network <- dataRetrieval::findNLDI(comid = outlet$comid, nav = "UT", distance_km = 500)
# we could select only comids on network
-if(FALSE) # don't run this one
-nhdplus_data <- nhdplusTools::subset_nhdplus(comids = as.integer(all_network$UT_flowlines$nhdplus_comid), nhdplus_data = "download", flowline_only = FALSE)
+if (FALSE) { # don't run this one
+ nhdplus_data <- nhdplusTools::subset_nhdplus(comids = as.integer(all_network$UT_flowlines$nhdplus_comid), nhdplus_data = "download", flowline_only = FALSE)
+}
# or we could just get everything in the bbox to be sure we get non-network stuff too!
nhdplus_data <- nhdplusTools::subset_nhdplus(
- bbox = sf::st_bbox(all_network$UT_flowlines),
- nhdplus_data = "download",
- flowline_only = FALSE)
+ bbox = sf::st_bbox(all_network$UT_flowlines),
+ nhdplus_data = "download",
+ flowline_only = FALSE
+)
# see ?nhdplusTools::subset_nhdplus for lots more options!
@@ -429,16 +434,20 @@ more!](https://doi-usgs.github.io/hydroloom/articles/hydroloom.html)
```{r}
GreenBay_FoxRiver_sf_locs <- sf::st_join(
- GreenBay_FoxRiver_sf_locs,
- hydroloom::st_compatibalize(dplyr::select(nhdplus_data$CatchmentSP, featureid),
- GreenBay_FoxRiver_sf_locs))
+ GreenBay_FoxRiver_sf_locs,
+ hydroloom::st_compatibalize(
+ dplyr::select(nhdplus_data$CatchmentSP, featureid),
+ GreenBay_FoxRiver_sf_locs
+ )
+)
# NOTE that featureid and comid are the same!!
all(GreenBay_FoxRiver_sf_locs$COMID == GreenBay_FoxRiver_sf_locs$featureid)
(linear_references <- hydroloom::index_points_to_lines(
nhdplus_data$NHDFlowline_Network,
- GreenBay_FoxRiver_sf_locs))
+ GreenBay_FoxRiver_sf_locs
+))
GreenBay_FoxRiver_sf_locs <- dplyr::bind_cols(GreenBay_FoxRiver_sf_locs, linear_references)
```
@@ -449,24 +458,33 @@ For on-network waterbodies, it will also include the outlet flowline for
each waterbody.
```{r}
-all_wb <- dplyr::bind_rows(dplyr::select(nhdplus_data$NHDWaterbody, wbid = comid),
- dplyr::select(nhdplus_data$NHDArea, wbid = comid))
+all_wb <- dplyr::bind_rows(
+ dplyr::select(nhdplus_data$NHDWaterbody, wbid = comid),
+ dplyr::select(nhdplus_data$NHDArea, wbid = comid)
+)
(waterbody_indexes <- hydroloom::index_points_to_waterbodies(
- sf::st_transform(all_wb, 5070),
- GreenBay_FoxRiver_sf_locs,
- flines = nhdplus_data$NHDFlowline_Network,
- search_radius = units::as_units(1000, "m")))
+ sf::st_transform(all_wb, 5070),
+ GreenBay_FoxRiver_sf_locs,
+ flines = nhdplus_data$NHDFlowline_Network,
+ search_radius = units::as_units(1000, "m")
+))
```
```{r}
-par(mar=c(0,0,0,0))
-nhdplusTools::plot_nhdplus(bbox = sf::st_bbox(GreenBay_FoxRiver_sf),
- cache_data = tempfile(fileext = ".rds"))
-plot(sf::st_transform(all_wb[all_wb$wbid %in% waterbody_indexes$near_wbid,],
- 3857),
- add = TRUE,
- col = "darkblue", border = NA)
+par(mar = c(0, 0, 0, 0))
+nhdplusTools::plot_nhdplus(
+ bbox = sf::st_bbox(GreenBay_FoxRiver_sf),
+ cache_data = tempfile(fileext = ".rds")
+)
+plot(
+ sf::st_transform(
+ all_wb[all_wb$wbid %in% waterbody_indexes$near_wbid, ],
+ 3857
+ ),
+ add = TRUE,
+ col = "darkblue", border = NA
+)
plot(sf::st_transform(sf::st_geometry(GreenBay_FoxRiver_sf_locs), 3857), add = TRUE, col = "white")
```
@@ -486,8 +504,8 @@ watershed for each particular site using
**Discover what StreamCat metrics we might want to use**
```{r}
-metrics <- StreamCatTools::sc_get_params(param = 'metric_names')
-print(paste0('A selection of available StreamCat metrics include: ',paste(metrics[1:10],collapse = ', ')))
+metrics <- StreamCatTools::sc_get_params(param = "metric_names")
+print(paste0("A selection of available StreamCat metrics include: ", paste(metrics[1:10], collapse = ", ")))
```
**Discover land cover of watersheds for sites**
@@ -496,19 +514,19 @@ We'll pull in all the NLCD categories at the local catchment level for
each location
```{r}
-GB_FR_NLCD <- StreamCatTools::sc_nlcd(year='2019', aoi='cat', comid=GreenBay_FoxRiver_sf_locs$COMID)
+GB_FR_NLCD <- StreamCatTools::sc_nlcd(year = "2019", aoi = "cat", comid = GreenBay_FoxRiver_sf_locs$COMID)
-GB_FR_Urb <- GB_FR_NLCD |>
- dplyr::mutate(Pct_Urbanized = pcturbop2019cat+pcturbmd2019cat+pcturblo2019cat+pcturbhi2019cat) |>
- dplyr::select(comid,Pct_Urbanized)
+GB_FR_Urb <- GB_FR_NLCD |>
+ dplyr::mutate(Pct_Urbanized = pcturbop2019cat + pcturbmd2019cat + pcturblo2019cat + pcturbhi2019cat) |>
+ dplyr::select(comid, Pct_Urbanized)
GB_FR_Urb
```
**Visualize urbanization for local catchment for each location**
```{r}
-ggplot2::ggplot(GB_FR_Urb, ggplot2::aes(x=Pct_Urbanized)) +
+ggplot2::ggplot(GB_FR_Urb, ggplot2::aes(x = Pct_Urbanized)) +
ggplot2::geom_density()
```
@@ -518,7 +536,7 @@ Now we'll just demonstrate pulling in watershed data that we might use
in a modeling exercise as spatial covariates
```{r}
-ws_data <- StreamCatTools::sc_get_data(metric='fert,nsurp,nani,manure,IWI', aoi='cat,ws', comid=GreenBay_FoxRiver_sf_locs$COMID)
+ws_data <- StreamCatTools::sc_get_data(metric = "fert,nsurp,nani,manure,IWI", aoi = "cat,ws", comid = GreenBay_FoxRiver_sf_locs$COMID)
```
# Building statistical models
diff --git a/vignettes/TADAAssessmentUnitUseCase.Rmd b/vignettes/TADAAssessmentUnitUseCase.Rmd
index c19fad0a1..857e049a0 100644
--- a/vignettes/TADAAssessmentUnitUseCase.Rmd
+++ b/vignettes/TADAAssessmentUnitUseCase.Rmd
@@ -86,7 +86,7 @@ remotes::install_github("USEPA/EPATADA",
```{r install_TADA_developer, include = FALSE}
remotes::install_github("USEPA/EPATADA",
- ref = "594-tada_insertbreak-bug-report",
+ ref = "cybertown2025",
dependencies = TRUE
)
library(EPATADA)
diff --git a/vignettes/TADACybertown2025.Rmd b/vignettes/TADACybertown2025.Rmd
new file mode 100644
index 000000000..7b5e4eb68
--- /dev/null
+++ b/vignettes/TADACybertown2025.Rmd
@@ -0,0 +1,584 @@
+---
+title: 'TADA Cybertown Workshop June 2025'
+format: html
+editor: visual
+author: "TADA Team"
+date: "`r Sys.Date()`"
+always_allow_html: true
+output:
+ rmarkdown::html_vignette:
+ toc: true
+ fig_caption: yes
+ fig_height: 8
+ fig_width: 8
+vignette: >
+ %\VignetteEncoding{UTF-8}
+ %\VignetteIndexEntry{TADA Cybertown Workshop June 2025}
+ %\VignetteEngine{knitr::rmarkdown}
+description: An introduction to using the EPATADA R package to retrieve, clean, analyze, and visualize WQP data.
+editor_options:
+ chunk_output_type: console
+ markdown:
+ wrap: 72
+---
+
+```{r setup, include = FALSE}
+knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
+```
+
+## Install
+
+First, install and load the remotes package specifying the repo. This is
+needed before installing the *EPATADA* R package because it is only
+available on GitHub.
+
+```{r remotes, results = 'hide', eval = F}
+install.packages("remotes", repos = "http://cran.us.r-project.org")
+library(remotes)
+```
+
+```{css, include = F}
+pre {
+ max-height: 300px;
+ overflow-y: auto;
+}
+
+pre[class] {
+ max-height: 300px;
+}
+```
+
+Next, install (or update) and load the *EPATADA* R package using the
+*remotes* R package. Additional dependency R packages that are used
+within *EPATADA* will be downloaded automatically. You may be prompted
+in the console to update dependency packages that have more recent
+versions available. If you see this prompt, it is recommended to update
+all of them (enter 1 into the console). Our team is actively developing
+*EPATADA*, therefore we highly recommend that you update the package
+(and all of its dependencies) each time you use it.
+
+```{r install, eval = F, results = 'hide'}
+remotes::install_github("USEPA/EPATADA", ref = "develop", dependencies = TRUE)
+library(EPATADA)
+```
+
+```{r install_dev, eval = T, include = F}
+remotes::install_github("USEPA/EPATADA", ref = "cybertown2025", dependencies = TRUE)
+library(EPATADA)
+```
+
+## Record start time
+
+```{r}
+start.time <- Sys.time()
+```
+
+## Retrieve
+
+Query the WQP using TADA_DataRetrieval. TADA_AutoClean is a powerful
+function that runs as part of TADA_DataRetrieval when applyautoclean =
+TRUE. It performs a variety of tasks, for example:
+
+1. creating new "TADA" prefixed columns and and capitalizing their
+ contents to reduce case sensitivity issues,
+
+2. converts special characters in value columns,
+
+3. converts latitude and longitude values to numeric,
+
+4. replaces "meters" with "m",
+
+5. replaces deprecated characteristic names with current WQX names,
+
+6. harmonizes result and detection limit units,
+
+7. converts depths to meters, and
+
+8. creates the column TADA.ComparableDataIdentifier by concatenating
+ characteristic name, result sample fraction, method speciation, and
+ result measure unit.
+
+In this example, we will first leverage How's My Waterway (HMW) and the
+ATTAINS geospatial services to find and load an ATTAINS Assessment Unit
+ID and shapefile (only works for polygons for now). We will query the
+ATTAINS geospatial services using the Assessment Unit ID found on HMW
+(see [example
+here](https://mywaterway.epa.gov/waterbody-report/CT_DEP01/CT6400-00-1-L5_01/2022)).
+Then we will use the shapefile as our input for the new aoi_sf query
+option included in TADA_DataRetrieval. This allows us to download WQP
+data within the Assessment Unit (our area of interest/AOI).
+
+```{r TADA_DataRetrieval}
+query.params <- list(
+ where = "assessmentunitidentifier IN ('CT6400-00-1-L5_01')",
+ outFields = "*",
+ f = "geojson"
+)
+
+url <- "https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/2/query?"
+
+poly.response <- httr::GET(url, query = query.params)
+
+poly.geojson <- httr::content(poly.response, as = "text", encoding = "UTF-8")
+
+poly.sf <- sf::st_read(poly.geojson, quiet = TRUE)
+
+WQP_raw <- TADA_DataRetrieval(
+ aoi_sf = poly.sf,
+ applyautoclean = TRUE,
+ ask = FALSE
+)
+
+# # For demo purposes, we pre-downloaded this example data
+# WQP_raw <- cybertown
+```
+
+Remove intermediate variables in R by using 'rm()'.
+
+```{r}
+rm(poly.response, poly.sf, query.params, poly.geojson, url)
+```
+
+## Flag, clean, and visualize
+
+Now, let's use EPATADA functions to review, visualize, and whittle the
+returned WQP data down to include only results that are applicable to
+our water quality analysis and area of interest.
+
+The **TADA_AnalysisDataFilter** function can assist in identifying and
+filtering surface water, groundwater, and sediment results. If you set
+clean = FALSE, this function will categorize and flag (but not remove)
+rows in a new *TADA.UseForAnalysis.Flag* column for review. However, the
+default functionality (clean = TRUE) is to include surface water and
+exclude groundwater and sediment results.
+
+```{r TADA_AnalysisDataFilter}
+WQP_flag <- TADA_AnalysisDataFilter(
+ WQP_raw,
+ clean = FALSE,
+ surface_water = TRUE,
+ ground_water = FALSE,
+ sediment = FALSE
+)
+
+# Review unique flags
+unique(WQP_flag$TADA.UseForAnalysis.Flag)
+
+# Review flagged rows
+WQP_flag_review <- WQP_flag %>%
+ dplyr::filter(TADA.UseForAnalysis.Flag == "No - NA") %>%
+ dplyr::select(c("TADA.UseForAnalysis.Flag", "ActivityMediaName", "ActivityMediaSubdivisionName", "AquiferName", "LocalAqfrName", "ConstructionDateText", "WellDepthMeasure.MeasureValue", "WellDepthMeasure.MeasureUnitCode", "WellHoleDepthMeasure.MeasureValue", "WellHoleDepthMeasure.MeasureUnitCode"))
+
+# Keep rows that are NOT flagged as sediment (keep SW and NA)
+WQP_clean <- WQP_flag %>%
+ dplyr::filter(TADA.UseForAnalysis.Flag != "No - SEDIMENT")
+```
+
+Create an overview map.
+
+```{r TADA_OverviewMap}
+TADA_OverviewMap(WQP_clean)
+```
+
+Let's take a quick look at all unique values in the
+MonitoringLocationIdentifier column and see how how many results are
+associated with each.
+
+```{r TADA_FieldValuesTable}
+# use TADA_FieldValuesTable to create a table of the number of results per MonitoringLocationIdentifier
+sites <- TADA_FieldValuesTable(WQP_clean, field = "MonitoringLocationIdentifier")
+
+DT::datatable(sites, fillContainer = TRUE)
+```
+
+Are there sites located within 100 meters of each other?
+
+```{r TADA_FlagCoordinates}
+WQP_clean <- TADA_FindNearbySites(WQP_clean)
+
+TADA_NearbySitesMap(WQP_clean)
+```
+
+Now let's review all unique values in the TADA.ComparableDataIdentifier
+column and see how how many results are associated with each.
+TADA.ComparableDataIdentifier concatenates TADA.CharacteristicName,
+TADA.ResultSampleFractionText, TADA.MethodSpeciationName, and
+TADA.ResultMeasure.MeasureUnitCode.
+
+```{r TADA_FieldValuesTable2}
+# use TADA_FieldValuesTable to create a table of the number of results per TADA.ComparableDataIdentifier
+chars <- TADA_FieldValuesTable(WQP_clean, field = "TADA.ComparableDataIdentifier")
+
+DT::datatable(chars, fillContainer = TRUE)
+```
+
+Remove intermediate variables in R by using 'rm()'.
+
+```{r}
+rm(chars, sites, WQP_flag_review, WQP_flag)
+```
+
+Next, let's check if the dataset contains potential duplicate results
+from within a single organization or from within multiple organizations
+(such as when two or more organizations monitor the same location and
+may submit duplicate results).
+
+If you would like to prioritize results from one organization over
+another, this can be done using the org_hierarchy argument in
+`TADA_FindPotentialDuplicatesMultipleOrgs`.
+
+```{r duplicates}
+# find duplicates from single org
+WQP_flag <- TADA_FindPotentialDuplicatesSingleOrg(WQP_clean)
+
+# Review organizations. You can select one to prioritize in TADA_FindPotentialDuplicatesMultipleOrgs
+unique(WQP_flag$OrganizationIdentifier)
+unique(WQP_flag$OrganizationFormalName)
+
+# find duplicates across multiple orgs
+WQP_flag <- TADA_FindPotentialDuplicatesMultipleOrgs(
+ WQP_flag,
+ org_hierarchy = c("CT_DEP01", "USGS-CT", "CTVOLMON", "NALMS")
+)
+```
+
+Let's review the duplicates:
+
+```{r review}
+WQP_flag_review <- WQP_flag %>%
+ dplyr::select(
+ MonitoringLocationName,
+ TADA.MonitoringLocationIdentifier,
+ # TADA.MultipleOrgDuplicate,
+ # TADA.MultipleOrgDupGroupID,
+ # TADA.ResultSelectedMultipleOrgs,
+ TADA.SingleOrgDupGroupID,
+ TADA.SingleOrgDup.Flag,
+ TADA.ComparableDataIdentifier,
+ ResultMeasureValue,
+ TADA.ResultMeasure.MeasureUnitCode,
+ TADA.MonitoringLocationName,
+ TADA.NearbySites.Flag,
+ TADA.NearbySiteGroup,
+ OrganizationIdentifier
+ ) %>%
+ dplyr::filter(TADA.SingleOrgDupGroupID != "Not a duplicate") %>%
+ dplyr::distinct()
+```
+
+We will select to keep only unique samples from
+`TADA_FindPotentialDuplicatesSingleOrg` by filtering for
+TADA.SingleOrgDup.Flag equals "Unique".
+
+There are no multiple org duplicates from
+`TADA_FindPotentialDuplicatesMultipleOrgs` in this example, but if there
+were, duplicates can by removed by filtering for
+TADA.ResultSelectedMultipleOrgs equals "Y".
+
+```{r filter}
+WQP_clean <- WQP_flag %>%
+ dplyr::filter(TADA.SingleOrgDup.Flag == "Unique") %>%
+ dplyr::filter(TADA.ResultSelectedMultipleOrgs == "Y")
+```
+
+Remove intermediate variables in R by using 'rm()'. In the remainder of
+this workshop, we will work with the clean dataset.
+
+```{r}
+rm(WQP_flag, WQP_flag_review)
+```
+
+Censored data are measurements for which the true value is not known,
+but we can estimate the value based on known lower or upper detection
+conditions and limit types. TADA fills missing *TADA.ResultMeasureValue*
+and *TADA.ResultMeasure.MeasureUnitCode* values with values and units
+from *TADA.DetectionQuantitationLimitMeasure.MeasureValue* and
+*TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode*, respectively,
+using the `TADA_AutoClean` function.
+
+The TADA package currently has functions that summarize censored data
+incidence in the dataset and perform simple substitutions of censored
+data values, including x times the detection limit and random selection
+of a value between 0 and the detection limit. The user may specify the
+methods used for non-detects and over-detects separately in the input to
+the `TADA_SimpleCensoredMethods` function. The next step we take in this
+example is to perform simple conversions to the censored data in the
+dataset: we keep over-detects as is (no conversion made) and convert
+non-detect values to 0.5 times the detection limit (half the detection
+limit).
+
+```{r censored}
+WQP_clean <- TADA_SimpleCensoredMethods(
+ WQP_clean,
+ nd_method = "multiplier",
+ nd_multiplier = 0.5,
+ od_method = "as-is",
+ od_multiplier = "null"
+)
+```
+
+`TADA_AutoFilter` removes rows where the result value is not numeric to
+prepare a dataframe for quantitative analyses. Specifically, this
+function removes rows with "Text" and "NA - Not Available" in the
+TADA.ResultMeasureValueDataTypes.Flag column, or NA in the
+TADA.ResultMeasureValue column. In addition, this function removes
+results with QA/QC ActivityTypeCode's. This function also removes any
+columns not required for TADA workflow where all values are equal to NA.
+
+```{r autofilter}
+WQP_clean <- TADA_AutoFilter(WQP_clean)
+```
+
+TADA_RunKeyFlagFunctions is a shortcut function to run important TADA
+flagging functions. See ?function documentation for TADA_FlagResultUnit,
+TADA_FlagFraction, TADA_FindQCActivities, TADA_FlagMeasureQualifierCode,
+and TADA_FlagSpeciation for more information.
+
+```{r TADA_RunKeyFlagFunctions}
+WQP_clean <- TADA_RunKeyFlagFunctions(
+ WQP_clean,
+ clean = TRUE
+)
+```
+
+Another set of TADA flagging functions, `TADA_FlagAboveThreshold` and
+`TADA_FlagBelowThreshold`, can be used to check results against national
+lower and upper thresholds. For these, we will set clean = FALSE and
+flaggedonly = TRUE so that it returns only flagged results in the review
+dataframe returned. We will keep these in our "clean" dataframe for now.
+
+```{r thresholds}
+WQP_flag_reviewabove <- TADA_FlagAboveThreshold(WQP_clean, clean = FALSE, flaggedonly = TRUE)
+
+WQP_flag_reviewbelow <- TADA_FlagBelowThreshold(WQP_clean, clean = FALSE, flaggedonly = TRUE)
+```
+
+Remove intermediate variables.
+
+```{r}
+rm(WQP_flag_reviewabove, WQP_flag_reviewbelow)
+```
+
+Let's take another look at all unique values in the
+TADA.ComparableDataIdentifier column and see how how many results are
+associated with each. TADA.ComparableDataIdentifier concatenates
+TADA.CharacteristicName, TADA.ResultSampleFractionText,
+TADA.MethodSpeciationName, and TADA.ResultMeasure.MeasureUnitCode.
+
+```{r TADA_FieldValuesTable3}
+# use TADA_FieldValuesTable to create a table of the number of results per TADA.ComparableDataIdentifier
+chars <- TADA_FieldValuesTable(WQP_clean, field = "TADA.ComparableDataIdentifier")
+
+chars_before <- unique(WQP_clean$TADA.ComparableDataIdentifier)
+
+DT::datatable(chars, fillContainer = TRUE)
+```
+
+Scroll through the table and check to see if there any synonyms. It may
+be possible that some of these can be automatically harmonized using
+`TADA_HarmonizeSynonyms` so their results can be directly compared.
+
+Let's give it a try.
+
+```{r}
+WQP_clean <- TADA_HarmonizeSynonyms(WQP_clean)
+```
+
+How many unique TADA.ComparableDataIdentifier's do we have now? In this
+example, there were no synonyms.
+
+```{r}
+chars_after <- unique(WQP_clean$TADA.ComparableDataIdentifier)
+```
+
+Remove intermediate variables.
+
+```{r}
+rm(chars_before, chars_after)
+```
+
+Create a pie chart.
+
+```{r}
+TADA_FieldValuesPie(WQP_clean, field = "TADA.CharacteristicName")
+```
+
+## Select characteristic
+
+Let's filter the data and focus on a one characteristic of interest.
+
+```{r}
+# Select characteristics of interest
+WQP_clean_subset <- WQP_clean %>%
+ dplyr::filter(TADA.CharacteristicName %in% "ESCHERICHIA COLI")
+```
+
+Remove intermediate variables. We will focus on the subset from now on.
+
+```{r}
+rm(WQP_clean, chars)
+```
+
+## Integrate ATTAINS and map
+
+In this section, we will associate geospatial data from **ATTAINS** with
+the **WQP** data, and filter the dataset to retain only results that
+were collected in specified Assessment Unit(s). We can also generate a
+new table to give us some information about the individual monitoring
+locations within the assessment unit(s).
+
+- TADA_GetATTAINS() automates matching of WQP monitoring locations
+ with ATTAINS assessment units that fall within (intersect) the same
+ NHDPlus catchment
+ ([details](https://usepa.github.io/EPATADA/articles/TADAModule2.html))
+- The function uses high resolution NHDPlus catchments by default
+ because 80% of state submitted assessment units in ATTAINS were
+ developed based on high res NHD; users can select med-res if
+ applicable to their use case
+
+```{r Data Retrieval - Geospatial}
+WQP_clean_subset_spatial <- TADA_GetATTAINS(
+ WQP_clean_subset,
+ fill_catchments = FALSE,
+ return_sf = TRUE,
+ return_nearest = TRUE
+)
+
+# Adds ATTAINS info to df
+WQP_clean_subset <- WQP_clean_subset_spatial$TADA_with_ATTAINS
+```
+
+View catchments and assessment units on map
+
+```{r TADA_ViewATTAINS}
+ATTAINS_map <- TADA_ViewATTAINS(WQP_clean_subset_spatial)
+
+ATTAINS_map
+```
+
+Remove intermediate variables:
+
+```{r}
+rm(ATTAINS_map)
+```
+
+Create table of monitoring location identifiers and AUs.
+
+```{r}
+ML_AU_crosswalk <- WQP_clean_subset %>%
+ dplyr::select(TADA.MonitoringLocationIdentifier, ATTAINS.assessmentunitidentifier, ATTAINS.assessmentunitname, TADA.CharacteristicName) %>%
+ dplyr::distinct()
+```
+
+Remove intermediate variables. Let's keep going with WQP_clean_subset.
+
+```{r}
+rm(ML_AU_crosswalk, WQP_clean_subset_spatial)
+```
+
+`TADA_RetainRequired` removes all duplicate columns where TADA has
+created a new column with a TADA prefix. It retains all TADA prefixed
+columns as well as other original fields that are either required by
+other TADA functions or are commonly used filters.
+
+```{r}
+WQP_clean_subset <- TADA_RetainRequired(WQP_clean_subset)
+```
+
+## Exploratory analysis
+
+Review unique TADA.ComparableDataIdentifier's
+
+```{r}
+unique(WQP_clean_subset$TADA.ComparableDataIdentifier)
+```
+
+Let's check if any results are above the EPA 304A recommended maximum
+criteria magnitude.
+
+[](chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://www.epa.gov/sites/default/files/2015-10/documents/rec-factsheet-2012.pdf)
+
+
+
+You can find other state, tribal, and EPA 304A criteria in the Criteria
+Search Tool:
+
+
+We will apply EPA recommendation 2 for ESCHERICHIA COLI (criteria
+magnitude of 320 CFU/100mL).
+
+```{r}
+# add column with comparison to criteria mag (excursions)
+WQP_clean_subset <- WQP_clean_subset %>%
+ dplyr::mutate(meets_criteria_mag = ifelse(TADA.ResultMeasureValue <= 320, "Yes", "No"))
+
+# review
+WQP_clean_subset_review <- WQP_clean_subset %>%
+ dplyr::select(
+ MonitoringLocationIdentifier, OrganizationFormalName, ActivityStartDate, TADA.ResultMeasureValue,
+ meets_criteria_mag
+ )
+
+DT::datatable(WQP_clean_subset_review, fillContainer = TRUE)
+```
+
+Generate stats table. Review percentiles. Less than 5% of results fall
+above 10 CFU/100mL, and over 98% of results fall below 265.2 CFU/100m.
+
+```{r stats}
+WQP_clean_subset_stats <- WQP_clean_subset %>%
+ TADA_Stats()
+```
+
+Generate a scatterplot. Only one result value is above the threshold.
+
+```{r}
+TADA_Scatterplot(WQP_clean_subset, id_cols = "TADA.ComparableDataIdentifier") %>%
+ plotly::add_lines(
+ y = 320,
+ x = c(min(WQP_clean_subset$ActivityStartDate), max(WQP_clean_subset$ActivityStartDate)),
+ inherit = FALSE,
+ showlegend = FALSE,
+ line = list(color = "red"),
+ hoverinfo = "none"
+ )
+```
+
+Generate a histogram.
+
+```{r boxplot and histogram}
+TADA_Histogram(WQP_clean_subset, id_cols = "TADA.ComparableDataIdentifier")
+```
+
+`TADA_Boxplot` can be useful for identifying skewness and percentiles.
+
+```{r boxplot, fig.width=8, fig.height=6, fig.fullwidth=TRUE}
+TADA_Boxplot(WQP_clean_subset, id_cols = "TADA.ComparableDataIdentifier")
+```
+
+## Record end time
+
+```{r analysis time}
+end.time <- Sys.time()
+
+end.time - start.time
+```
+
+Reproducible and Documented
+
+This workflow is reproducible and the decisions at each step are well
+documented. This means that it is easy to go back and review every step,
+understand the decisions that were made, make changes as necessary, and
+run it again.
diff --git a/vignettes/TADACybertown2025_word.Rmd b/vignettes/TADACybertown2025_word.Rmd
new file mode 100644
index 000000000..5b28624ea
--- /dev/null
+++ b/vignettes/TADACybertown2025_word.Rmd
@@ -0,0 +1,569 @@
+---
+title: "TADA Cybertown Workshop June 2025"
+author: "TADA Team"
+date: "`r Sys.Date()`"
+output: word_document
+always_allow_html: true
+description: An introduction to using the EPATADA R package to retrieve, clean, analyze, and visualize WQP data.
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
+```
+
+## Install
+
+First, install and load the remotes package specifying the repo. This is
+needed before installing the *EPATADA* R package because it is only
+available on GitHub.
+
+```{r remotes, results = 'hide', eval = F}
+install.packages("remotes", repos = "http://cran.us.r-project.org")
+library(remotes)
+```
+
+```{css, include = F}
+pre {
+ max-height: 300px;
+ overflow-y: auto;
+}
+
+pre[class] {
+ max-height: 300px;
+}
+```
+
+Next, install (or update) and load the *EPATADA* R package using the
+*remotes* R package. Additional dependency R packages that are used
+within *EPATADA* will be downloaded automatically. You may be prompted
+in the console to update dependency packages that have more recent
+versions available. If you see this prompt, it is recommended to update
+all of them (enter 1 into the console). Our team is actively developing
+*EPATADA*, therefore we highly recommend that you update the package
+(and all of its dependencies) each time you use it.
+
+```{r install, eval = F, results = 'hide'}
+remotes::install_github("USEPA/EPATADA", ref = "develop", dependencies = TRUE)
+library(EPATADA)
+```
+
+```{r install_dev, eval = T, include = F}
+remotes::install_github("USEPA/EPATADA", ref = "cybertown2025", dependencies = TRUE)
+library(EPATADA)
+```
+
+## Record start time
+
+```{r}
+start.time <- Sys.time()
+```
+
+## Retrieve
+
+Query the WQP using TADA_DataRetrieval. TADA_AutoClean is a powerful
+function that runs as part of TADA_DataRetrieval when applyautoclean =
+TRUE. It performs a variety of tasks, for example:
+
+1. creating new "TADA" prefixed columns and and capitalizing their
+ contents to reduce case sensitivity issues,
+
+2. converts special characters in value columns,
+
+3. converts latitude and longitude values to numeric,
+
+4. replaces "meters" with "m",
+
+5. replaces deprecated characteristic names with current WQX names,
+
+6. harmonizes result and detection limit units,
+
+7. converts depths to meters, and
+
+8. creates the column TADA.ComparableDataIdentifier by concatenating
+ characteristic name, result sample fraction, method speciation, and
+ result measure unit.
+
+In this example, we will first leverage How's My Waterway (HMW) and the
+ATTAINS geospatial services to find and load an ATTAINS Assessment Unit
+ID and shapefile (only works for polygons for now). We will query the
+ATTAINS geospatial services using the Assessment Unit ID found on HMW
+(see [example
+here](https://mywaterway.epa.gov/waterbody-report/CT_DEP01/CT6400-00-1-L5_01/2022)).
+Then we will use the shapefile as our input for the new aoi_sf query
+option included in TADA_DataRetrieval. This allows us to download WQP
+data within the Assessment Unit (our area of interest/AOI).
+
+```{r TADA_DataRetrieval}
+query.params <- list(
+ where = "assessmentunitidentifier IN ('CT6400-00-1-L5_01')",
+ outFields = "*",
+ f = "geojson"
+)
+
+url <- "https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/2/query?"
+
+poly.response <- httr::GET(url, query = query.params)
+
+poly.geojson <- httr::content(poly.response, as = "text", encoding = "UTF-8")
+
+poly.sf <- sf::st_read(poly.geojson, quiet = TRUE)
+
+WQP_raw <- TADA_DataRetrieval(
+ aoi_sf = poly.sf,
+ applyautoclean = TRUE,
+ ask = FALSE
+)
+
+# # For demo purposes, we pre-downloaded this example data
+# WQP_raw <- cybertown
+```
+
+Remove intermediate variables in R by using 'rm()'.
+
+```{r}
+rm(poly.response, poly.sf, query.params, poly.geojson, url)
+```
+
+## Flag, clean, and visualize
+
+Now, let's use EPATADA functions to review, visualize, and whittle the
+returned WQP data down to include only results that are applicable to
+our water quality analysis and area of interest.
+
+The **TADA_AnalysisDataFilter** function can assist in identifying and
+filtering surface water, groundwater, and sediment results. If you set
+clean = FALSE, this function will categorize and flag (but not remove)
+rows in a new *TADA.UseForAnalysis.Flag* column for review. However, the
+default functionality (clean = TRUE) is to include surface water and
+exclude groundwater and sediment results.
+
+```{r TADA_AnalysisDataFilter}
+WQP_flag <- TADA_AnalysisDataFilter(
+ WQP_raw,
+ clean = FALSE,
+ surface_water = TRUE,
+ ground_water = FALSE,
+ sediment = FALSE
+)
+
+# Review unique flags
+unique(WQP_flag$TADA.UseForAnalysis.Flag)
+
+# Review flagged rows
+WQP_flag_review <- WQP_flag %>%
+ dplyr::filter(TADA.UseForAnalysis.Flag == "No - NA") %>%
+ dplyr::select(c("TADA.UseForAnalysis.Flag", "ActivityMediaName", "ActivityMediaSubdivisionName", "AquiferName", "LocalAqfrName", "ConstructionDateText", "WellDepthMeasure.MeasureValue", "WellDepthMeasure.MeasureUnitCode", "WellHoleDepthMeasure.MeasureValue", "WellHoleDepthMeasure.MeasureUnitCode"))
+
+# Keep rows that are NOT flagged as sediment (keep SW and NA)
+WQP_clean <- WQP_flag %>%
+ dplyr::filter(TADA.UseForAnalysis.Flag != "No - SEDIMENT")
+```
+
+Create an overview map.
+
+```{r TADA_OverviewMap}
+TADA_OverviewMap(WQP_clean)
+```
+
+Let's take a quick look at all unique values in the
+MonitoringLocationIdentifier column and see how how many results are
+associated with each.
+
+```{r TADA_FieldValuesTable}
+# use TADA_FieldValuesTable to create a table of the number of results per MonitoringLocationIdentifier
+sites <- TADA_FieldValuesTable(WQP_clean, field = "MonitoringLocationIdentifier")
+
+DT::datatable(sites, fillContainer = TRUE)
+```
+
+Are there sites located within 100 meters of each other?
+
+```{r TADA_FlagCoordinates}
+WQP_clean <- TADA_FindNearbySites(WQP_clean)
+
+TADA_NearbySitesMap(WQP_clean)
+```
+
+Now let's review all unique values in the TADA.ComparableDataIdentifier
+column and see how how many results are associated with each.
+TADA.ComparableDataIdentifier concatenates TADA.CharacteristicName,
+TADA.ResultSampleFractionText, TADA.MethodSpeciationName, and
+TADA.ResultMeasure.MeasureUnitCode.
+
+```{r TADA_FieldValuesTable2}
+# use TADA_FieldValuesTable to create a table of the number of results per TADA.ComparableDataIdentifier
+chars <- TADA_FieldValuesTable(WQP_clean, field = "TADA.ComparableDataIdentifier")
+
+DT::datatable(chars, fillContainer = TRUE)
+```
+
+Remove intermediate variables in R by using 'rm()'.
+
+```{r}
+rm(chars, sites, WQP_flag_review, WQP_flag)
+```
+
+Next, let's check if the dataset contains potential duplicate results
+from within a single organization or from within multiple organizations
+(such as when two or more organizations monitor the same location and
+may submit duplicate results).
+
+If you would like to prioritize results from one organization over
+another, this can be done using the org_hierarchy argument in
+`TADA_FindPotentialDuplicatesMultipleOrgs`.
+
+```{r duplicates}
+# find duplicates from single org
+WQP_flag <- TADA_FindPotentialDuplicatesSingleOrg(WQP_clean)
+
+# Review organizations. You can select one to prioritize in TADA_FindPotentialDuplicatesMultipleOrgs
+unique(WQP_flag$OrganizationIdentifier)
+unique(WQP_flag$OrganizationFormalName)
+
+# find duplicates across multiple orgs
+WQP_flag <- TADA_FindPotentialDuplicatesMultipleOrgs(
+ WQP_flag,
+ org_hierarchy = c("CT_DEP01", "USGS-CT", "CTVOLMON", "NALMS")
+)
+```
+
+Let's review the duplicates:
+
+```{r review}
+WQP_flag_review <- WQP_flag %>%
+ dplyr::select(
+ MonitoringLocationName,
+ TADA.MonitoringLocationIdentifier,
+ # TADA.MultipleOrgDuplicate,
+ # TADA.MultipleOrgDupGroupID,
+ # TADA.ResultSelectedMultipleOrgs,
+ TADA.SingleOrgDupGroupID,
+ TADA.SingleOrgDup.Flag,
+ TADA.ComparableDataIdentifier,
+ ResultMeasureValue,
+ TADA.ResultMeasure.MeasureUnitCode,
+ TADA.MonitoringLocationName,
+ TADA.NearbySites.Flag,
+ TADA.NearbySiteGroup,
+ OrganizationIdentifier
+ ) %>%
+ dplyr::filter(TADA.SingleOrgDupGroupID != "Not a duplicate") %>%
+ dplyr::distinct()
+```
+
+We will select to keep only unique samples from
+`TADA_FindPotentialDuplicatesSingleOrg` by filtering for
+TADA.SingleOrgDup.Flag equals "Unique".
+
+There are no multiple org duplicates from
+`TADA_FindPotentialDuplicatesMultipleOrgs` in this example, but if there
+were, duplicates can by removed by filtering for
+TADA.ResultSelectedMultipleOrgs equals "Y".
+
+```{r filter}
+WQP_clean <- WQP_flag %>%
+ dplyr::filter(TADA.SingleOrgDup.Flag == "Unique") %>%
+ dplyr::filter(TADA.ResultSelectedMultipleOrgs == "Y")
+```
+
+Remove intermediate variables in R by using 'rm()'. In the remainder of
+this workshop, we will work with the clean dataset.
+
+```{r}
+rm(WQP_flag, WQP_flag_review)
+```
+
+Censored data are measurements for which the true value is not known,
+but we can estimate the value based on known lower or upper detection
+conditions and limit types. TADA fills missing *TADA.ResultMeasureValue*
+and *TADA.ResultMeasure.MeasureUnitCode* values with values and units
+from *TADA.DetectionQuantitationLimitMeasure.MeasureValue* and
+*TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode*, respectively,
+using the `TADA_AutoClean` function.
+
+The TADA package currently has functions that summarize censored data
+incidence in the dataset and perform simple substitutions of censored
+data values, including x times the detection limit and random selection
+of a value between 0 and the detection limit. The user may specify the
+methods used for non-detects and over-detects separately in the input to
+the `TADA_SimpleCensoredMethods` function. The next step we take in this
+example is to perform simple conversions to the censored data in the
+dataset: we keep over-detects as is (no conversion made) and convert
+non-detect values to 0.5 times the detection limit (half the detection
+limit).
+
+```{r censored}
+WQP_clean <- TADA_SimpleCensoredMethods(
+ WQP_clean,
+ nd_method = "multiplier",
+ nd_multiplier = 0.5,
+ od_method = "as-is",
+ od_multiplier = "null"
+)
+```
+
+`TADA_AutoFilter` removes rows where the result value is not numeric to
+prepare a dataframe for quantitative analyses. Specifically, this
+function removes rows with "Text" and "NA - Not Available" in the
+TADA.ResultMeasureValueDataTypes.Flag column, or NA in the
+TADA.ResultMeasureValue column. In addition, this function removes
+results with QA/QC ActivityTypeCode's. This function also removes any
+columns not required for TADA workflow where all values are equal to NA.
+
+```{r autofilter}
+WQP_clean <- TADA_AutoFilter(WQP_clean)
+```
+
+TADA_RunKeyFlagFunctions is a shortcut function to run important TADA
+flagging functions. See ?function documentation for TADA_FlagResultUnit,
+TADA_FlagFraction, TADA_FindQCActivities, TADA_FlagMeasureQualifierCode,
+and TADA_FlagSpeciation for more information.
+
+```{r TADA_RunKeyFlagFunctions}
+WQP_clean <- TADA_RunKeyFlagFunctions(
+ WQP_clean,
+ clean = TRUE
+)
+```
+
+Another set of TADA flagging functions, `TADA_FlagAboveThreshold` and
+`TADA_FlagBelowThreshold`, can be used to check results against national
+lower and upper thresholds. For these, we will set clean = FALSE and
+flaggedonly = TRUE so that it returns only flagged results in the review
+dataframe returned. We will keep these in our "clean" dataframe for now.
+
+```{r thresholds}
+WQP_flag_reviewabove <- TADA_FlagAboveThreshold(WQP_clean, clean = FALSE, flaggedonly = TRUE)
+
+WQP_flag_reviewbelow <- TADA_FlagBelowThreshold(WQP_clean, clean = FALSE, flaggedonly = TRUE)
+```
+
+Remove intermediate variables.
+
+```{r}
+rm(WQP_flag_reviewabove, WQP_flag_reviewbelow)
+```
+
+Let's take another look at all unique values in the
+TADA.ComparableDataIdentifier column and see how how many results are
+associated with each. TADA.ComparableDataIdentifier concatenates
+TADA.CharacteristicName, TADA.ResultSampleFractionText,
+TADA.MethodSpeciationName, and TADA.ResultMeasure.MeasureUnitCode.
+
+```{r TADA_FieldValuesTable3}
+# use TADA_FieldValuesTable to create a table of the number of results per TADA.ComparableDataIdentifier
+chars <- TADA_FieldValuesTable(WQP_clean, field = "TADA.ComparableDataIdentifier")
+
+chars_before <- unique(WQP_clean$TADA.ComparableDataIdentifier)
+
+DT::datatable(chars, fillContainer = TRUE)
+```
+
+Scroll through the table and check to see if there any synonyms. It may
+be possible that some of these can be automatically harmonized using
+`TADA_HarmonizeSynonyms` so their results can be directly compared.
+
+Let's give it a try.
+
+```{r}
+WQP_clean <- TADA_HarmonizeSynonyms(WQP_clean)
+```
+
+How many unique TADA.ComparableDataIdentifier's do we have now? In this
+example, there were no synonyms.
+
+```{r}
+chars_after <- unique(WQP_clean$TADA.ComparableDataIdentifier)
+```
+
+Remove intermediate variables.
+
+```{r}
+rm(chars_before, chars_after)
+```
+
+Create a pie chart.
+
+```{r}
+TADA_FieldValuesPie(WQP_clean, field = "TADA.CharacteristicName")
+```
+
+## Select characteristic
+
+Let's filter the data and focus on a one characteristic of interest.
+
+```{r}
+# Select characteristics of interest
+WQP_clean_subset <- WQP_clean %>%
+ dplyr::filter(TADA.CharacteristicName %in% "ESCHERICHIA COLI")
+```
+
+Remove intermediate variables. We will focus on the subset from now on.
+
+```{r}
+rm(WQP_clean, chars)
+```
+
+## Integrate ATTAINS and map
+
+In this section, we will associate geospatial data from **ATTAINS** with
+the **WQP** data, and filter the dataset to retain only results that
+were collected in specified Assessment Unit(s). We can also generate a
+new table to give us some information about the individual monitoring
+locations within the assessment unit(s).
+
+- TADA_GetATTAINS() automates matching of WQP monitoring locations
+ with ATTAINS assessment units that fall within (intersect) the same
+ NHDPlus catchment
+ ([details](https://usepa.github.io/EPATADA/articles/TADAModule2.html))
+- The function uses high resolution NHDPlus catchments by default
+ because 80% of state submitted assessment units in ATTAINS were
+ developed based on high res NHD; users can select med-res if
+ applicable to their use case
+
+```{r Data Retrieval - Geospatial}
+WQP_clean_subset_spatial <- TADA_GetATTAINS(
+ WQP_clean_subset,
+ fill_catchments = FALSE,
+ return_sf = TRUE,
+ return_nearest = TRUE
+)
+
+# Adds ATTAINS info to df
+WQP_clean_subset <- WQP_clean_subset_spatial$TADA_with_ATTAINS
+```
+
+View catchments and assessment units on map
+
+```{r TADA_ViewATTAINS}
+ATTAINS_map <- TADA_ViewATTAINS(WQP_clean_subset_spatial)
+
+ATTAINS_map
+```
+
+Remove intermediate variables:
+
+```{r}
+rm(ATTAINS_map)
+```
+
+Create table of monitoring location identifiers and AUs.
+
+```{r}
+ML_AU_crosswalk <- WQP_clean_subset %>%
+ dplyr::select(TADA.MonitoringLocationIdentifier, ATTAINS.assessmentunitidentifier, ATTAINS.assessmentunitname, TADA.CharacteristicName) %>%
+ dplyr::distinct()
+```
+
+Remove intermediate variables. Let's keep going with WQP_clean_subset.
+
+```{r}
+rm(ML_AU_crosswalk, WQP_clean_subset_spatial)
+```
+
+`TADA_RetainRequired` removes all duplicate columns where TADA has
+created a new column with a TADA prefix. It retains all TADA prefixed
+columns as well as other original fields that are either required by
+other TADA functions or are commonly used filters.
+
+```{r}
+WQP_clean_subset <- TADA_RetainRequired(WQP_clean_subset)
+```
+
+## Exploratory analysis
+
+Review unique TADA.ComparableDataIdentifier's
+
+```{r}
+unique(WQP_clean_subset$TADA.ComparableDataIdentifier)
+```
+
+Let's check if any results are above the EPA 304A recommended maximum
+criteria magnitude.
+
+[](chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://www.epa.gov/sites/default/files/2015-10/documents/rec-factsheet-2012.pdf)
+
+
+
+You can find other state, tribal, and EPA 304A criteria in the Criteria
+Search Tool:
+
+
+We will apply EPA recommendation 2 for ESCHERICHIA COLI (criteria
+magnitude of 320 CFU/100mL).
+
+```{r}
+# add column with comparison to criteria mag (excursions)
+WQP_clean_subset <- WQP_clean_subset %>%
+ dplyr::mutate(meets_criteria_mag = ifelse(TADA.ResultMeasureValue <= 320, "Yes", "No"))
+
+# review
+WQP_clean_subset_review <- WQP_clean_subset %>%
+ dplyr::select(
+ MonitoringLocationIdentifier, OrganizationFormalName, ActivityStartDate, TADA.ResultMeasureValue,
+ meets_criteria_mag
+ )
+
+DT::datatable(WQP_clean_subset_review, fillContainer = TRUE)
+```
+
+Generate stats table. Review percentiles. Less than 5% of results fall
+above 10 CFU/100mL, and over 98% of results fall below 265.2 CFU/100m.
+
+```{r stats}
+WQP_clean_subset_stats <- WQP_clean_subset %>%
+ TADA_Stats()
+```
+
+Generate a scatterplot. Only one result value is above the threshold.
+
+```{r}
+TADA_Scatterplot(WQP_clean_subset, id_cols = "TADA.ComparableDataIdentifier") %>%
+ plotly::add_lines(
+ y = 320,
+ x = c(min(WQP_clean_subset$ActivityStartDate), max(WQP_clean_subset$ActivityStartDate)),
+ inherit = FALSE,
+ showlegend = FALSE,
+ line = list(color = "red"),
+ hoverinfo = "none"
+ )
+```
+
+Generate a histogram.
+
+```{r boxplot and histogram}
+TADA_Histogram(WQP_clean_subset, id_cols = "TADA.ComparableDataIdentifier")
+```
+
+`TADA_Boxplot` can be useful for identifying skewness and percentiles.
+
+```{r boxplot, fig.width=8, fig.height=6, fig.fullwidth=TRUE}
+TADA_Boxplot(WQP_clean_subset, id_cols = "TADA.ComparableDataIdentifier")
+```
+
+## Record end time
+
+```{r analysis time}
+end.time <- Sys.time()
+
+end.time - start.time
+```
+
+Reproducible and Documented
+
+This workflow is reproducible and the decisions at each step are well
+documented. This means that it is easy to go back and review every step,
+understand the decisions that were made, make changes as necessary, and
+run it again.
diff --git a/vignettes/TADAModule1.Rmd b/vignettes/TADAModule1.Rmd
index 0a1e66f75..0f1c08e3c 100644
--- a/vignettes/TADAModule1.Rmd
+++ b/vignettes/TADAModule1.Rmd
@@ -79,7 +79,7 @@ remotes::install_github("USEPA/EPATADA",
```{r dev_install, results = 'hide', include = F}
remotes::install_github("USEPA/EPATADA",
- ref = "594-tada_insertbreak-bug-report",
+ ref = "cybertown2025",
dependencies = TRUE
)
```
diff --git a/vignettes/TADAModule1_AdvancedTraining.Rmd b/vignettes/TADAModule1_AdvancedTraining.Rmd
index cb22bcfc7..59195123f 100644
--- a/vignettes/TADAModule1_AdvancedTraining.Rmd
+++ b/vignettes/TADAModule1_AdvancedTraining.Rmd
@@ -110,10 +110,10 @@ the `ref` input in `install_github` (see code chunk below). This
functionality is mainly only useful to TADA package
developers/contributors.
-```{r install_TADA_dev, eval = F, include = F}
+```{r install_TADA_dev, include = F}
# helps with development (knit and checks), this chunk should not appear on the pkgdown website
remotes::install_github("USEPA/EPATADA",
- ref = "594-tada_insertbreak-bug-report",
+ ref = "cybertown2025",
dependencies = TRUE
)
```
diff --git a/vignettes/TADAModule1_BeginnerTraining.Rmd b/vignettes/TADAModule1_BeginnerTraining.Rmd
index 189d49aff..673fb4cdb 100644
--- a/vignettes/TADAModule1_BeginnerTraining.Rmd
+++ b/vignettes/TADAModule1_BeginnerTraining.Rmd
@@ -93,7 +93,7 @@ remotes::install_github("USEPA/EPATADA",
```{r install_TADA_developer, eval = F, include = F}
remotes::install_github("USEPA/EPATADA",
- ref = "594-tada_insertbreak-bug-report",
+ ref = "cybertown2025",
dependencies = TRUE
)
@@ -321,7 +321,7 @@ FieldValues_AnalysisFlag <- TADA_FieldValuesTable(R5Profile, field = "TADA.UseFo
analysis**
```{r question3}
-# Filter to flag only surface water results for use in analysis
+# Select only surface water results for use in analysis
n_sur_water <- FieldValues_AnalysisFlag %>%
dplyr::filter(Value == "Yes - SURFACE WATER") %>%
diff --git a/vignettes/TADAModule2.Rmd b/vignettes/TADAModule2.Rmd
index 949b7bb0f..6d580c03c 100644
--- a/vignettes/TADAModule2.Rmd
+++ b/vignettes/TADAModule2.Rmd
@@ -81,7 +81,7 @@ remotes::install_github("USEPA/EPATADA",
```{r dev_install, results = 'hide', include = F}
remotes::install_github("USEPA/EPATADA",
- ref = "594-tada_insertbreak-bug-report",
+ ref = "cybertown2025",
dependencies = TRUE
)
```
@@ -294,7 +294,7 @@ information. In these instances, the user can optionally fill in
catchment information from the NHD by entering `fill_catchments = TRUE`:
```{r}
-TADA_with_ATTAINS_filled <- TADA_GetATTAINS(TADA_dataframe, fill_catchments = TRUE, return_sf = TRUE, return_neares = FALSE)
+TADA_with_ATTAINS_filled <- TADA_GetATTAINS(TADA_dataframe, fill_catchments = TRUE, return_sf = TRUE, return_nearest = FALSE)
```
When `fill_catchments = TRUE`, the returned list splits observations
@@ -448,7 +448,7 @@ daily value (DV) water data. Like `TADA_listNWIS()`, users can query
data based on an area of interest (i.e., an sf object), statecode, or
specific sites.
-Additionally, users must specify the parameter codes and statstics they
+Additionally, users must specify the parameter codes and statistics they
want to download and a date range. A list of all available parameters
can be found at:
[https://help.waterdata.usgs.gov/parameter_cd?group_cd=%](https://help.waterdata.usgs.gov/parameter_cd?group_cd=%){.uri}
diff --git a/vignettes/TADAModule3_PartA.Rmd b/vignettes/TADAModule3_PartA.Rmd
index c56a3394c..a8a79cb12 100644
--- a/vignettes/TADAModule3_PartA.Rmd
+++ b/vignettes/TADAModule3_PartA.Rmd
@@ -65,7 +65,7 @@ update all of them (enter 1 into the console).
```{r install_TADA_dev, include = F}
# helps with development (knit and checks), this chunk should not appear on the pkgdown website
remotes::install_github("USEPA/EPATADA",
- ref = "594-tada_insertbreak-bug-report",
+ ref = "cybertown2025",
dependencies = TRUE
)
```
diff --git a/vignettes/TADAWaterSciConWorkshopDemo.Rmd b/vignettes/TADAWaterSciConWorkshopDemo.Rmd
index 77cabaa75..de5afbd9d 100644
--- a/vignettes/TADAWaterSciConWorkshopDemo.Rmd
+++ b/vignettes/TADAWaterSciConWorkshopDemo.Rmd
@@ -75,7 +75,7 @@ remotes::install_github("USEPA/EPATADA",
```{r install_TADA_developer, include = FALSE}
remotes::install_github("USEPA/EPATADA",
- ref = "594-tada_insertbreak-bug-report",
+ ref = "cybertown2025",
dependencies = TRUE
)
```
diff --git a/vignettes/images/bacteria.png b/vignettes/images/bacteria.png
new file mode 100644
index 000000000..cd065b37e
Binary files /dev/null and b/vignettes/images/bacteria.png differ