USEPA · laurashumway · Aug 11, 2022 · Aug 4, 2022 · Aug 5, 2022 · Aug 5, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -30,7 +30,7 @@ Depends:
 Suggests: 
     rmarkdown,
     knitr,
-    testthat,
+    testthat (>= 3.0.0),
     usethis,
     devtools,
     pkgdown,
@@ -39,3 +39,4 @@ Suggests:
     spelling
 VignetteBuilder: knitr, rmarkdown
 Language: en-US
+Config/testthat/edition: 3
diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
@@ -134,20 +134,25 @@ TADAdataRetrieval <- function(statecode = "null",
 #' your code. This URL let's you return to the WQP query page with all your
 #' original data filters.
 #'
-#' @param webservice WQP Web Service URL
+#' @param webservice WQP Web Service URL, entered within quotes "url"
 #'
 #' @return WQP Full Physical Chemical Results Data Profile
 #'
 #' @export
 #'
 
 readWQPwebservice <- function(webservice) {
+  #consider function dataRetrieval::getWebServiceData
   # read in csv from WQP web service
   if (grepl("zip=yes", webservice)) {
     webservice <- stringr::str_replace(webservice, "zip=yes", "zip=no")
     return(data.table::fread(toString(webservice)))
+    #update when we switch to WQX 3.0
+    #return(autoclean(data.table::fread(toString(webservice))))
   } else {
     return(data.table::fread(webservice))
+    #update when we switch to WQX 3.0
+    #return(autoclean(data.table::fread(webservice)))
   }
 }
 

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -1,4 +1,4 @@
-url: ~
+url: https://pkgdown.r-lib.org
 template:
   bootstrap: 5
-
+  bootswatch: cerulean
diff --git a/man/readWQPwebservice.Rd b/man/readWQPwebservice.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/tests.html
+# * https://testthat.r-lib.org/reference/test_package.html#special-files
+
+library(testthat)
+library(TADA)
+
+test_check("TADA")
diff --git a/vignettes/WQPDataHarmonization.Rmd b/vignettes/WQPDataHarmonization.Rmd
@@ -10,16 +10,26 @@ vignette: >
 editor_options: 
   chunk_output_type: console
 ---
-##Install dependency packages if needed
-```{r}
+
+```{r setup, include = FALSE}
+#This code is used to build (knit) an HTML vignette
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>", 
+  echo = TRUE
+)
+```
+
+## Install and load packages
+
+Install and load dependency packages. Current versions of TADA and dataRetrieval can be installed from GitHub instead of CRAN.
+
+```{r, echo = FALSE}
 list.of.packages <- c("plyr","dplyr","ggplot2","RColorBrewer","Rcpp","devtools",
           "data.table","grDevices","magrittr","stringr","testthat","usethis","utils","stats","rmarkdown","knitr","remotes")
 new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 if(length(new.packages)) install.packages(new.packages)
-```
 
-##Load dependency packages
-```{r}
 library(remotes)
 library(dplyr)
 library(ggplot2)
@@ -37,27 +47,16 @@ library(stats)
 library(rmarkdown)
 library(knitr)
 library(devtools)
-```
 
-#Load TADA and dataRetrieval
-You can install and load the most recent versions from GitHub by running:
-```{r}
 remotes::install_github("USGS-R/dataRetrieval", dependencies=TRUE)
 remotes::install_github("USEPA/TADA")
 library(dataRetrieval)
 library(TADA)
-```
 
-#Knit vignette
-This code is used to build an HTML vignette
-```{r setup, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>"
-)
 ```
 
 ## Retrieve WQP data
+
 WQP data is retrieved and processed for compatibility with TADA. This function, TADAdataRetrieval builds on the USGS dataRetrieval package functions. It joins three WQP profiles (i.e., the station, narrow, and phys/chem), changes all data in the Characteristic, Speciation, Fraction, and Unit fields to uppercase, removes true duplicates, removes data for all non-water media types, and cleans results with special characters.
 
 This function uses the same inputs as the dataRetrieval `readWQPdata` function. `readWQPdata` does not restrict the characteristics pulled from WQP. You may specify the desired characteristics by using, for instance: 'characteristicName = "pH". 
@@ -73,14 +72,25 @@ Here is more information on the dataRetrieval R Package:
 Water Quality Portal downloads have the same columns each time, but be aware that data are uploaded to the Water Quality Portal by individual organizations, which may or may not follow the same conventions. Data and metadata quality are not guaranteed! Make sure to carefully explore any data and make conservative quality assurance decisions where information is limited. 
 
 In this vignette, we will walk through some of the things to look for when deciding to use different organizations data from the WQP.
-```{r}
+
+```{r, echo = FALSE}
 #You can edit this to define your own WQP query inputs below
 TADAProfile <- TADAdataRetrieval(statecode = "UT", 
                               characteristicName = c("Ammonia", "Nitrate", "Nitrogen"),
                               startDate = "01-01-2021")
+
+#Alternatively, use the data.table::fread function to read in a web service call for any WQP profile. 
+new_fullphyschem <- data.table::fread("https://www.waterqualitydata.us/data/Result/search?countrycode=US&statecode=US%3A49&siteid=UTAHDWQ_WQX-4925610&startDateLo=01-01-2015&startDateHi=12-31-2016&mimeType=csv&zip=no&sorted=yes&dataProfile=fullPhysChem&providers=NWIS&providers=STEWARDS&providers=STORET")
+```
+
+Review all column names in the TADA Profile
+
+```{r}
+colnames(new_fullphyschem)
 ```
 
 ## Depth unit conversions
+
 Converts depth units to a consistent unit. 
 
 **ActivityDepthHeightMeasure.MeasureValue** provides depth information. This is a crucial column for lake data but less often for river data.
@@ -99,11 +109,10 @@ See additional function documentation for additional function options by enterin
 ```{r}
 #converts all depth profile data to meters
 TADAProfileClean1 <- DepthProfileData(TADAProfile, unit = "m", transform = TRUE)
-
-colnames(TADAProfileClean1)
 ```
 
-# Result unit conversions
+## Result unit conversions
+
 Converts all results to WQX target units. WQX target units are pulled from the MeasureUnit domain table: https://cdx2.epa.gov/wqx/download/DomainValues/MeasureUnit.CSV
 
 See additional function documentation for additional function options by entering the following 
@@ -115,8 +124,9 @@ code in the console:
 TADAProfileClean2 <- WQXTargetUnits(TADAProfileClean1, transform = TRUE)
 ```
 
-##Continuous data
-Checks for and removes aggregated continuous data, if present. 
+## Statistically aggregated data
+
+Checks for and removes statistically aggregated high freqeuncy (i.e., continuous) data, if present. 
 
 The Water Quality Portal (WQP) is not designed to store high-frequency sensor data. However, sometimes data providers choose to aggregate their continuous data and submit it to WQP as one value. This type of data may not be suitable for integration with discrete water quality data for assessments. Therefore, this function uses metadata submitted by data providers to flags rows with aggregated continuous data. This is done by flagging results where the ResultDetectionConditionText = "Reported in Raw Data (attached)". When clean = TRUE, rows with aggregated continuous data are removed from the dataset and no column will be appended. Default is clean = TRUE.
 
@@ -128,6 +138,7 @@ TADAProfileClean3 <- AggregatedContinuousData(TADAProfileClean2, clean = TRUE)
 ```
 
 ## WQX QAQC Service Result Flags
+
 Run the following result functions to address invalid method, fraction, speciation, and unit
 metadata by characteristic. The default is Clean=TRUE, which will remove invalid results. 
 You can change this to clean=FALSE to flag results, but not remove them. 
@@ -137,6 +148,7 @@ See documentation for more details:
 ?InvalidSpeciation
 ?InvalidResultUnit
 ?InvalidFraction
+
 ```{r}
 TADAProfileClean4 <- InvalidMethod(TADAProfileClean3, clean = TRUE)
 TADAProfileClean5 <- InvalidFraction(TADAProfileClean4, clean = TRUE)
@@ -145,6 +157,7 @@ TADAProfileClean7 <- InvalidResultUnit(TADAProfileClean6, clean = TRUE)
 ```
 
 ## WQX national upper and lower thresholds
+
 Run the following code to flag or remove results that are above or below the national
 upper and lower bound for each characteristic and unit combination. The default is 
 clean=TRUE, but you can change this to only flag results if desired. Results will be 
@@ -154,20 +167,23 @@ TADAProfileClean8 <- AboveNationalWQXUpperThreshold(TADAProfileClean7, clean = T
 TADAProfileClean9 <- BelowNationalWQXUpperThreshold(TADAProfileClean8, clean = TRUE)
 ```
 
-# Potential duplicates
+## Potential duplicates
+
 Sometimes multiple organizations submit the exact same data to Water Quality Portal (WQP), which can affect water quality analyses and assessments. This function checks for and identifies data that is identical in all fields excluding organization-specific and comment text fields. Each pair or group of potential duplicate rows is flagged with a unique ID. When clean = TRUE, the function retains the first occurrence of each potential duplicate in the dataset. Default is clean = TRUE.
 ```{r}
 TADAProfileClean10 <- PotentialDuplicateRowID(TADAProfileClean9)
 ```
 
-# Invalid coordinates
+## Invalid coordinates
+
 Function identifies and flags invalid coordinate data. When clean_outsideUSA = FALSE and clean_imprecise = FALSE, a column will be appended titled "TADA.InvalidCoordinates" with the following flags (if relevant to dataset). If the latitude is less than zero, the row will be flagged with "LAT_OutsideUSA". If the longitude is greater than zero AND less than 145, the row will be flagged as "LONG_OutsideUSA". If the latitude or longitude contains the string, "999", the row will be flagged as invalid. Finally, precision can be measured by the number of decimal places in the latitude and longitude provided. If either does not have any numbers to the right of the decimal point, the row will be flagged as "Imprecise".
 
 ```{r}
 TADAProfileClean11 <- InvalidCoordinates(TADAProfileClean10, clean_outsideUSA = FALSE, clean_imprecise = FALSE)
 ```
 
-# QAPPapproved
+## Review QAPP information
+
 Check data for an approved QAPP
 
 This function checks to see if there is any information in the column "QAPPApprovedIndicator". Some organizations submit data for this field to indicate if the data produced has an approved Quality Assurance Project Plan (QAPP) or not. In this field, Y indicates yes, N indicates no. 
@@ -177,22 +193,26 @@ This function has two default inputs: clean = TRUE and cleanNA = FALSE. These de
 TADAProfileClean12 <- QAPPapproved(TADAProfileClean11, clean = TRUE, cleanNA = FALSE)
 ```
 
-# QAPPDocAvailable
+Check to see if a QAPP Doc is Available
+
 This function checks data submitted under the "ProjectFileUrl" column to determine if a QAPP document is available to review. When clean = FALSE,
 a column will be appended to flag results that do have an associated
 QAPP document URL provided. When clean = TRUE, rows that do not
 have an associated QAPP document are removed from the dataset and no column
 will be appended. This function should only be used to remove data if an
 accompanying QAPP document is required to use data in assessments.
+
 ```{r}
 TADAProfileClean13 <- QAPPDocAvailable(TADAProfileClean12, clean = FALSE)
 ```
 
 ## Filter data by field
+
 In this section a TADA user will want to review the unique values in specific fields and may choose to remove data with particular values. 
 
 To start, review the list of fields and the number of unique values in each field.
-```{r, echo=FALSE}
+
+```{r}
 FilterFields(TADAProfileClean13)
 ```
 
@@ -202,44 +222,52 @@ Here is a list of other fields to review:
 1. **ResultCommentText** often has details relating to additional QA.
 2. **MeasureQualifierCode** Contains information about data flags
 3. Other codes may designate suspect data or other flags which may be described in detail in **ResultLaboratoryCommentText** or another column
-```{r, echo=FALSE}
+
+```{r}
 FilterFieldReview("ActivityTypeCode", TADAProfileClean13)
 ```
 
 The ActivityTypeCode field has four unique values -- "Sample-Routine", "Quality Control Sample-Field Replicate", "Field Msr/Obs", and "Quality Control Sample-Field Blank." In this example we want to remove quality control values in the ActivityTypeCode field, therefore, we'll specify that we want to remove the "Quality Control Sample-Field Replicate" and "Quality Control Sample-Field Blank" values in the ActivityTypeCode field.
+
 ```{r}
 TADAProfileClean14 <- dplyr::filter(TADAProfileClean13, !(ActivityTypeCode %in% c("Quality Control Sample-Field Replicate", "Quality Control Sample-Field Blank")))
 ```
 
 We've completed our review of the ActivityTypeCode field. 
 
 Let's move on to a different field and see if there are any values that we want to remove -- we'll look at the values in the ResultStatusIdentifier field.
-```{r, echo=FALSE}
+
+```{r}
 FilterFieldReview("ActivityMediaSubdivisionName", TADAProfileClean14)
 ```
 
 The ActivityMediaSubdivisionName field has two unique values, "Surface Water" and "Groundwater." In this example we want to remove the "Groundwater" values. 
+
 ```{r}
 TADAProfileClean15 <- dplyr::filter(TADAProfileClean14, !(ActivityMediaSubdivisionName %in% "Groundwater"))
 ```
 
 ## Filter data by field, subset by parameter
+
 In this section a TADA user will want to select a parameter, review the unique values associated with that parameter in specific fields, and choose to remove particular values. 
 
-To start, review the list of parameters in the dataset. (The list is sorted from highest to lowest counts. Only the first few rows are displayed to save space on the page) 
-```{r, echo=FALSE}
+To start, review the list of parameters in the dataset. (The list is sorted from highest to lowest counts. Only the first few rows are displayed to save space on the page)
+
+```{r}
 FilterParList(TADAProfileClean15)
 ```
 
 Next, select a parameter. Let's explore the fields associated with Nitrogen:
-```{r, echo=FALSE}
+
+```{r}
 FilterParFields(TADAProfileClean15, "NITROGEN")
 ```
 
 Selecting a parameter generates the list below, which is subset by the selected parameter, of fields and the number of unique values in each field.
 
 Then choose a field from the list. In this example we'll remove certain values from the HydrologicEvent field.
-```{r, echo=FALSE}
+
+```{r}
 FilterParFieldReview("HydrologicEvent", TADAProfileClean15, "NITROGEN")
 ```
 
@@ -250,6 +278,7 @@ TADAProfileClean16 <- dplyr::filter(TADAProfileClean15, !(CharacteristicName %in
 ```
 
 ## Transform Characteristic, Speciation, and Unit values to TADA Standards
+
 The HarmonizeRefTable function generates a harmonization reference table that is specific to the input dataset. Users can review how their input data relates to standard TADA values for CharacteristicName, ResultSampleFractionText, MethodSpecicationName, and ResultMeasure.MeasureUnitCode and they can optionally edit the reference file to meet their needs. The download argument can be used to save the harmonization file to your 
 current working directory when download = TRUE, the default is download = FALSE.