From f778ca9de30253d40d509c03f2000cca266476ae Mon Sep 17 00:00:00 2001
From: Elise H <121896266+ehinman@users.noreply.github.com>
Date: Mon, 30 Jan 2023 14:14:07 -0500
Subject: [PATCH 01/10] TADAbigdataRet chunking by site

- changed statecode default to "no" to match other inputs
- specified startDate and endDate if not populated in input.
- Made readWQPsummary a flexible query in case site type or statecode are not included
- State code can be used but not default.
- removes objects not needed in future computation (save space)
- removed temp RDS file creation/reading within package.
---
 R/DataDiscoveryRetrieval.R                   | 121 +++++++------------
 man/TADABigdataRetrieval.Rd                  |   2 +-
 tests/testthat/test-DataDiscoveryRetrieval.R |  10 +-
 3 files changed, 56 insertions(+), 77 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index d7b0ce2fb..e2cfbadd5 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -272,54 +272,69 @@ TADAReadWQPWebServices <- function(webservice) {
 
 TADABigdataRetrieval <- function(startDate = "null",
                               endDate = "null",
-                              statecode = character(0),
+                              statecode = "null",
                               characteristicName = "null", 
                               siteType = "null"
 ) {
+
+  if(!startDate=="null"){
+    startDate_Low = lubridate::ymd(startDate)
+    startYearLo = lubridate::year(startDate_Low)
+  }else{ # else: pick a date before which any data are unlikely to be in WQP
+    startDate = "1800-01-01"
+    startDate_Low = lubridate::ymd(startDate)
+    startYearLo = lubridate::year(startDate_Low)
+  } 
   
-  startDate_Low = lubridate::ymd(startDate)
-  startYearLo = lubridate::year(startDate_Low)
-  
-  endDate_High = lubridate::ymd(endDate)
-  startYearHi = lubridate::year(endDate_High)
+# Logic: if the input endDate is not null, convert to date and obtain year
+  # for summary
+  if(!endDate=="null"){
+    endDate_High = lubridate::ymd(endDate)
+    endYearHi = lubridate::year(endDate_High)
+  }else{ # Else, if not populated, default to using today's date/year for summary
+    endDate = Sys.Date()
+    endDate_High = lubridate::ymd(endDate)
+    endYearHi = lubridate::year(endDate_High)
+  }
   
+  # Create WQPsummary query
+  WQPquery <- list()
   if (length(characteristicName)>1) {
-    characteristicName = list(characteristicName) 
+    WQPquery = c(WQPquery,characteristicName = list(characteristicName)) 
   } else if (characteristicName != "null") {
-    characteristicName = characteristicName
+    WQPquery = c(WQPquery,characteristicName = characteristicName)
   }
-
   if (length(siteType)>1) {
-    siteType = list(siteType)
+    WQPquery = c(WQPquery,siteType = list(siteType)) 
   } else if (siteType != "null") {
-    siteType = siteType
+    WQPquery = c(WQPquery,siteType = siteType)
   }
-  
-  state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv")
-  
-  if(length(statecode)>0){
+
+  if (!statecode=="null") {
+    state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv")
     statecode = as.character(statecode)
     state_cd_cont = state_cd_cont%>%filter(STUSAB%in%statecode)
+    statecd = state_cd_cont$STATE
     if(nrow(state_cd_cont)==0){stop("State code is not valid. Check FIPS state/territory abbreviations.")}
+    if(length(statecode)>1){
+      WQPquery = c(WQPquery, statecode=list(statecd))
+    }else{WQPquery = c(WQPquery, statecode=statecd)}
   }
   
-  for(i in seq_len(nrow(state_cd_cont))){
-    
-    state_cd = as.numeric(state_cd_cont$STATE[i])
-    state_nm = state_cd_cont$STUSAB[i]
+  df_summary = dataRetrieval::readWQPsummary(WQPquery)
+  
     ## NOTE: if query brings back no results, function returns empty 
     # dataRetrieval profile, not empty summary
-    df_summary = dataRetrieval::readWQPsummary(statecode = state_cd,
-                     characteristicName = characteristicName, 
-                     siteType = siteType)
     if(nrow(df_summary)>0){
       sites = df_summary %>%
         dplyr::filter(YearSummarized >= startYearLo,
-                    YearSummarized <= startYearHi)
+                      YearSummarized <= endYearHi)
+      
       siteid_all = unique(sites$MonitoringLocationIdentifier)
+      rm(df_summary) # save some space
       
       if(length(siteid_all) > 0) {
-        #print(paste0("Grabbing ",state_nm," data from ",length(siteid_all)," sites."))
+        rm(sites) # save some space
         l=length(siteid_all)  #len(sites)
         maxsites=100   #max number of sites pulled per WQP query
         #may want to consider using the total number of records in a given 
@@ -371,60 +386,16 @@ TADABigdataRetrieval <- function(startDate = "null",
           df = dplyr::bind_rows(df, joins)
         }
       }else{
-        joins = data.frame()
-        # print(paste0(state_nm, " returned no data."))
-        }
-    
-      if(nrow(df) > 0){
-      
-      #####
-      #need to edit below if temporary rds files do not go away
-      #may be able to delete below
-      #https://stackoverflow.com/questions/47626331/saving-and-retrieving-temp-files-in-r-packages
-      #####
-      
-      #original
-      #saveRDS(df_state, file = paste0(state_nm, "_raw_data.rds"))
-      
-        tempfilename = paste0(state_nm, "_raw_data.rds")
-        file.path(tempdir(), saveRDS(df, file = paste0("inst/tempdata/", tempfilename)))
-
-        }
-      } #else{print(paste0(state_nm, " had no data."))}
-  }
-    all_data <- data.frame()
-    stdir = list.files("inst/tempdata/")
-    
-    for(k in 1:length(stdir)){
-      path = paste0("inst/tempdata/",stdir[k])
-      allstates_df <- tryCatch({
-        #####
-        #need to edit line below if rds files do not go away
-        #####
-      
-        #original below
-       #readRDS(paste0(state, "_raw_data.rds"))
-      
-        readRDS(path)
-      })
-      unlink(path)
-    
-      if(nrow(allstates_df) > 0){
-        allstates_df$ResultMeasureValue = as.character(allstates_df$ResultMeasureValue)
-        allstates_df$HorizontalAccuracyMeasure.MeasureValue = as.character(allstates_df$HorizontalAccuracyMeasure.MeasureValue)
-        allstates_df$ActivityDepthHeightMeasure.MeasureValue = as.character(allstates_df$ActivityDepthHeightMeasure.MeasureValue)
-        allstates_df$DetectionQuantitationLimitMeasure.MeasureValue = as.character(allstates_df$DetectionQuantitationLimitMeasure.MeasureValue)
-        all_data <- dplyr::bind_rows(all_data, allstates_df)
+        warning("Query returned no data. Function returns an empty dataframe.")
+        return(sites)
       }
+    }else{
+      warning("Query returned no data. Function returns an empty dataframe.")
+      return(df_summary)
+}
     
-    } 
-    
-    # # Do not need if date input works in dataRetrieval functions
-    # finalprofile = all_data %>%
-    #   dplyr::filter(ActivityStartDate <= endDate,
-    #                 ActivityStartDate >= startDate)
     
-    finalprofile = autoclean(all_data)
+    finalprofile = autoclean(df)
     #not sure if above is working correctly, thousands of "duplicated" rows are removed
     # you will still need to filter on activity media subdivision now
     
diff --git a/man/TADABigdataRetrieval.Rd b/man/TADABigdataRetrieval.Rd
index a4a7d3192..af77a20ef 100644
--- a/man/TADABigdataRetrieval.Rd
+++ b/man/TADABigdataRetrieval.Rd
@@ -7,7 +7,7 @@
 TADABigdataRetrieval(
   startDate = "null",
   endDate = "null",
-  statecode = character(0),
+  statecode = "null",
   characteristicName = "null",
   siteType = "null"
 )
diff --git a/tests/testthat/test-DataDiscoveryRetrieval.R b/tests/testthat/test-DataDiscoveryRetrieval.R
index ba46d26fc..2e1bd9cc3 100644
--- a/tests/testthat/test-DataDiscoveryRetrieval.R
+++ b/tests/testthat/test-DataDiscoveryRetrieval.R
@@ -135,6 +135,14 @@ test_that("TADAdataRetrieval", {
   expect_true(any(check_autoclean_meters_works$ActivityDepthHeightMeasure.MeasureUnitCode!="meters"))
   })
 
+# Testing that regular and big data retrieval return the same number of rows on an identical query.
+test_that("Reg&BigdataRetrieval",{
+  big <- TADABigdataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", siteType = "Stream")
+  reg <- TADAdataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", siteType = "Stream")
+  
+  expect_equal(nrow(big),nrow(reg))
+})
+
 
 # Testing that the JoinWQPProfiles() function in DataDiscoveryRetrieval.R 
 # has the expected number of columns after joining the full physical chemical 
@@ -195,4 +203,4 @@ test_that("JoinWQPProfile", {
                     "WellHoleDepthMeasure.MeasureUnitCode",
                     "MethodSpecificationName") %in% names(join)))
   
-})
\ No newline at end of file
+})

From 649e3e739683662a93b2a3f080c4b95c42de47fd Mon Sep 17 00:00:00 2001
From: cristinamullin <mullin.cristina@epa.gov>
Date: Mon, 30 Jan 2023 17:02:51 -0500
Subject: [PATCH 02/10] updated examples

removed bad example
---
 R/DataDiscoveryRetrieval.R  | 7 +++++--
 man/TADABigdataRetrieval.Rd | 5 ++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index e2cfbadd5..959129eda 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -248,6 +248,10 @@ TADAReadWQPWebServices <- function(webservice) {
 #'  censored data later on (i.e., nondetections)
 #'  
 #' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
+#' 
+#' Reference: 
+#' Some code for this function was adapted from USGS (Author: Aliesha Krall):
+#' https://waterdata.usgs.gov/blog/large_sample_pull/
 #'
 #' @param startDate Start Date YYYY-MM-DD format, for example, "1995-01-01"
 #' @param endDate end date in YYYY-MM-DD format, for example, "2020-12-31"
@@ -264,8 +268,7 @@ TADAReadWQPWebServices <- function(webservice) {
 #' tada2 <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", siteType = "Stream")
 #' 
 #' tada3 <- TADABigdataRetrieval(characteristicName = "Phosphorus")
-#' 
-#' tada3 <- TADABigdataRetrieval(statecode = "CT")
+#'
 #' }
 #' 
 
diff --git a/man/TADABigdataRetrieval.Rd b/man/TADABigdataRetrieval.Rd
index af77a20ef..b66e0e3fd 100644
--- a/man/TADABigdataRetrieval.Rd
+++ b/man/TADABigdataRetrieval.Rd
@@ -59,6 +59,10 @@ and to provide information about the result values that is needed to address
 censored data later on (i.e., nondetections)
 
 See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
+
+Reference:
+Some code for this function was adapted from USGS (Author: Aliesha Krall):
+https://waterdata.usgs.gov/blog/large_sample_pull/
 }
 \examples{
 \dontrun{
@@ -66,7 +70,6 @@ tada2 <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31",
 
 tada3 <- TADABigdataRetrieval(characteristicName = "Phosphorus")
 
-tada3 <- TADABigdataRetrieval(statecode = "CT")
 }
 
 }

From 31282589c4505617596f06b94c3f6c9f3f42a3a2 Mon Sep 17 00:00:00 2001
From: cristinamullin <mullin.cristina@epa.gov>
Date: Tue, 31 Jan 2023 10:24:47 -0500
Subject: [PATCH 03/10] documentation fix

---
 R/DataDiscoveryRetrieval.R  | 11 ++++++-----
 man/TADABigdataRetrieval.Rd |  7 +++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 959129eda..c251ce134 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -245,13 +245,14 @@ TADAReadWQPWebServices <- function(webservice) {
 #' "TADA.DetectionLimitMeasureValue.Flag" are created to track and changes made
 #' to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns; 
 #' and to provide information about the result values that is needed to address
-#'  censored data later on (i.e., nondetections)
-#'  
+#' censored data later on (i.e., nondetections)
+#' 
+#' Some code for this function was adapted from this USGS Blog (Author: Aliesha Krall)
+#' \href{https://waterdata.usgs.gov/blog/large_sample_pull/}{Large Sample Pull}  
+#' 
 #' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 #' 
-#' Reference: 
-#' Some code for this function was adapted from USGS (Author: Aliesha Krall):
-#' https://waterdata.usgs.gov/blog/large_sample_pull/
+#'
 #'
 #' @param startDate Start Date YYYY-MM-DD format, for example, "1995-01-01"
 #' @param endDate end date in YYYY-MM-DD format, for example, "2020-12-31"
diff --git a/man/TADABigdataRetrieval.Rd b/man/TADABigdataRetrieval.Rd
index b66e0e3fd..4a25eed02 100644
--- a/man/TADABigdataRetrieval.Rd
+++ b/man/TADABigdataRetrieval.Rd
@@ -58,11 +58,10 @@ to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns;
 and to provide information about the result values that is needed to address
 censored data later on (i.e., nondetections)
 
-See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
+Some code for this function was adapted from this USGS Blog (Author: Aliesha Krall)
+\href{https://waterdata.usgs.gov/blog/large_sample_pull/}{Large Sample Pull}
 
-Reference:
-Some code for this function was adapted from USGS (Author: Aliesha Krall):
-https://waterdata.usgs.gov/blog/large_sample_pull/
+See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 }
 \examples{
 \dontrun{

From 6aced185f011895c8144d27fe6c8668504b06f62 Mon Sep 17 00:00:00 2001
From: cristinamullin <mullin.cristina@epa.gov>
Date: Tue, 31 Jan 2023 10:26:50 -0500
Subject: [PATCH 04/10] update vignette

---
 vignettes/WQPDataHarmonization.Rmd | 91 ++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 31 deletions(-)

diff --git a/vignettes/WQPDataHarmonization.Rmd b/vignettes/WQPDataHarmonization.Rmd
index 10ce6eed5..8cad07416 100644
--- a/vignettes/WQPDataHarmonization.Rmd
+++ b/vignettes/WQPDataHarmonization.Rmd
@@ -30,7 +30,7 @@ multiple organizations.
 
 To install TADA, currently you need to install from GitHub using remotes
 (shown) or devtools. dataRetrieval will be downloaded from CRAN, but the
-development version can be downloaded directly from GitHub (un-comment).
+development version can be downloaded directly from GitHub (uncomment).
 
 The following code will also install any packages you do not have, and
 load all packages required to run this vignette into your R session.
@@ -53,8 +53,8 @@ dataRetrieval from GitHub
 library(remotes)
 ```
 
-Uncomment the lines below to install latest version of TADA and
-dataRetrieval from GitHub.
+Uncomment the lines below to install latest version of TADA,
+dataRetrieval, and ggplot2 from GitHub.
 
 ```{r, results = 'hide', message = FALSE, warning = FALSE}
 # remotes::install_github("USGS-R/dataRetrieval", dependencies=TRUE)
@@ -62,6 +62,9 @@ dataRetrieval from GitHub.
 # remotes::install_github("hadley/ggplot2", dependencies=TRUE)
 
 remotes::install_github("USEPA/TADA", dependencies=TRUE)
+
+# if you experience any issues installing TADA, try uncommenting and running the line below before the install
+# options(download.file.method = "wininet")
 ```
 
 Load the required libraries to run this vignette into your R session
@@ -216,9 +219,9 @@ Option 1: Use the TADAdataRetrieval function.
 ```{r}
 # uncomment below if you would like to review differences between the profiles you would get using readWQPdata vs. TADAdataRetrieval. The profiles are different because TADAdataRetrieval automatically joins in other data from different WQP profiles, and does some additional data cleaning as part of the data retrieval process.
 
-#dataRetrievalProfile <- dataRetrieval::readWQPdata(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "01-01-2021", ignore_attributes = TRUE)
+# dataRetrievalProfile <- dataRetrieval::readWQPdata(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "01-01-2021", ignore_attributes = TRUE)
 
-#You can edit this to define your own WQP query inputs below
+# You can edit the line below to define your own WQP query inputs
 TADAProfile <- TADAdataRetrieval(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "10-01-2020")
 
 ```
@@ -243,8 +246,8 @@ single TADA compatible dataframe as the output. For large dataframes,
 that can save a lot of time and ultimately reduce the complexity of
 subsequent data processing. Using this function, you will be able to
 download all data available from all sites in the contiguous United
-States that is available for the time period, characteristicName, statecode, 
-and siteType requested.
+States that is available for the time period, characteristicName,
+statecode, and siteType requested.
 
 See ?TADABigdataRetrieval for more details. WARNING, this can take
 multiple HOURS to run. The total run time depends on your query inputs.
@@ -259,7 +262,6 @@ multiple HOURS to run. The total run time depends on your query inputs.
 #
 # Phosphorus <- TADABigdataRetrieval(characteristicName = "Phosphorus")
 # 
-# CT <- TADABigdataRetrieval(statecode = "CT")
 ```
 
 Review all column names in the TADA Profile
@@ -274,24 +276,27 @@ TADAProfile_CharSummary <- SummarizeCharacteristics(TADAProfile)
 Review station locations
 
 ```{r}
-#create a map of the world
-map()
+# create new blank window for figure
+grDevices::dev.new(width=20, height=10, unit="in")
+
+# create a map of the world
+maps::map()
 
 # uncomment below to create USA base map instead
-#map("usa")
+#maps::map("usa")
 
 # uncomment below to add state outlines to USA map
-#map("state", add=TRUE)
+#maps::map("state", add=TRUE)
 
 # uncomment below to add county outlines to USA map
-#map("county", add=TRUE)
+#maps::map("county", add=TRUE)
 
 
 # uncomment below to draw map of a specific state instead, with county lines
 # maps::map('county', 'utah')
 
 # draw the site locations onto the map
-points(TADAProfile$LongitudeMeasure, TADAProfile$LatitudeMeasure, col="red", pch=20)
+graphics::points(TADAProfile$LongitudeMeasure, TADAProfile$LatitudeMeasure, col="red", pch=20)
 ```
 
 The TADA **InvalidCoordinates** function identifies and flags invalid
@@ -340,9 +345,13 @@ flags (if relevant to dataframe):
 ```{r}
 TADAProfileClean1 <- InvalidCoordinates(TADAProfile, clean_outsideUSA = "remove", clean_imprecise = TRUE)
 
-#redraw map after all rows with invalid LAT/LONG data are removed
-map('county', 'utah')
-points(TADAProfileClean1$LongitudeMeasure, TADAProfileClean1$LatitudeMeasure, col="red", pch=20)
+# redraw map after all rows with invalid LAT/LONG data are removed
+# create new blank window for figure
+grDevices::dev.new(width=20, height=10, unit="in")
+
+# draw map
+maps::map('county', 'utah')
+graphics::points(TADAProfileClean1$LongitudeMeasure, TADAProfileClean1$LatitudeMeasure, col="red", pch=20)
 ```
 
 ## Depth unit conversions
@@ -379,7 +388,7 @@ See additional function documentation for additional function options by
 entering the following code in the console: ?ConvertDepthUnits
 
 ```{r}
-#converts all depth profile data to meters
+# converts all depth profile data to meters
 TADAProfileClean2 <- ConvertDepthUnits(TADAProfileClean1, unit = "m", transform = TRUE)
 ```
 
@@ -394,7 +403,7 @@ See additional function documentation for additional function options by
 entering the following code in the console: ?ConvertResultUnits
 
 ```{r}
-#Converts all results to WQX target units
+# converts all results to WQX target units
 TADAProfileClean3 <- ConvertResultUnits(TADAProfileClean2, transform = TRUE)
 ```
 
@@ -507,8 +516,11 @@ See documentation for more details:
 
 ```{r}
 TADAProfileClean5 <- InvalidMethod(TADAProfileClean4, clean = TRUE)
+
 TADAProfileClean6 <- InvalidFraction(TADAProfileClean5, clean = TRUE)
+
 TADAProfileClean7 <- InvalidSpeciation(TADAProfileClean6, clean = FALSE)
+
 TADAProfileClean8 <- InvalidResultUnit(TADAProfileClean7, clean = FALSE)
 ```
 
@@ -546,6 +558,7 @@ combination. See documentation for more details:
 
 ```{r}
 TADAProfileClean9 <- AboveNationalWQXUpperThreshold(TADAProfileClean8, clean = TRUE)
+
 TADAProfileClean10 <- BelowNationalWQXLowerThreshold(TADAProfileClean9, clean = TRUE)
 ```
 
@@ -653,6 +666,10 @@ Here is a list of other fields to review:
         Other codes may designate suspect data or other flags which may be described in detail in **ResultLaboratoryCommentText** or another column
 
 ```{r, fig.width=6, fig.height=2, fig.fullwidth=TRUE}
+# create new blank window for figure
+grDevices::dev.new(width=20, height=10, unit="in")
+
+# run filter function
 FilterFieldReview("ActivityTypeCode", TADAProfileClean13)
 ```
 
@@ -675,6 +692,9 @@ we want to remove -- we'll look at the values in the
 ResultStatusIdentifier field.
 
 ```{r, fig.width=6, fig.height=2, fig.fullwidth=TRUE}
+# create new blank window for figure
+grDevices::dev.new(width=20, height=10, unit="in")
+
 FilterFieldReview("ActivityMediaSubdivisionName", TADAProfileClean14)
 ```
 
@@ -715,6 +735,9 @@ Then choose a field from the list. In this example we'll remove certain
 values from the HydrologicEvent field.
 
 ```{r, fig.width=6, fig.height=2, fig.fullwidth=TRUE}
+# create new blank window for figure
+grDevices::dev.new(width=20, height=10, unit="in")
+
 FilterParFieldReview("HydrologicEvent", TADAProfileClean15, "NITROGEN")
 ```
 
@@ -783,36 +806,42 @@ UniqueHarmonizationRef <- HarmonizationRefTable(TADAProfileClean16, download = F
 TADAProfileClean17 <- HarmonizeData(TADAProfileClean16, ref = UniqueHarmonizationRef, transform = TRUE, flag = TRUE)
 ```
 
-THE FOLLOWING IS CHARACTERISTIC SPECIFIC
+## Characteristic Specific Analyses Below
 
-PLACEHOLDER: Address censored data for a single characteristic
+Address censored data for a single characteristic.
 
 ```{r}
-#How to point to a column in R: yourdataset$thecolumnyouwant
-#E.g., temp$ResultMeasureValueNitrogenExample <- filter(TADAProfileClean17, CharacteristicName == "NITROGEN")
+# how to point to a column in R: yourdataset$thecolumnyouwant
+# e.g., temp$ResultMeasureValueNitrogenExample <- filter(TADAProfileClean17, CharacteristicName == "NITROGEN")
 SingleCharacteristic <- filter(TADAProfileClean17, CharacteristicName == "TOTAL NITROGEN, MIXED FORMS")
 
-#Review the types of nondetects you have
+# review the types of censored data you have in your data frame
 table(SingleCharacteristic$ResultDetectionConditionText)
 
-#Review number of results that are NA's
+# review the number of results that are NA's
 sum(is.na(SingleCharacteristic$ResultMeasureValue))
 
-#substitute a value for NA. In this example, 0.02 MG/L is used for TOTAL NITROGEN, MIXED FORMS
+# substitute a value for NA. In this example, 0.02 MG/L is used for TOTAL NITROGEN, MIXED FORMS
 SingleCharacteristic$ResultMeasureValue <- ifelse(is.na(SingleCharacteristic$ResultMeasureValue) == TRUE, 0.02, SingleCharacteristic$ResultMeasureValue)
 ```
 
-PLACEHOLDER: Summarize results for a single characteristic
+Summarize results for a single characteristic. This currently only works
+for Total Nitrogen. Updates are coming soon.
 
 ```{r}
 summary(SingleCharacteristic$ResultMeasureValue)
-#na.rm = TRUE strips NA values before the mean computation proceeds
+# add na.rm = TRUE to the function above to strip NA values before the mean computation proceeds
+
+# create new blank window for figure
+grDevices::dev.new(width=20, height=10, unit="in")
+
+# creat histogram
 hist(SingleCharacteristic$ResultMeasureValue)
 ```
 
-PLACEHOLDER: Create animated national map for a single characteristic
+Create animated national map for a single characteristic. This animation
+currently only works for Total Nitrogen. Updates are coming soon.
 
 ```{r}
-#Animation currently only works for Total Nitrogen. Updates coming soon.
-CreateAnimatedMap(SingleCharacteristic)
+TADA::CreateAnimatedMap(SingleCharacteristic)
 ```

From efe9da15f62ac664e65615bd7d15ad4e7f96fc4f Mon Sep 17 00:00:00 2001
From: cristinamullin <mullin.cristina@epa.gov>
Date: Tue, 31 Jan 2023 21:15:08 -0500
Subject: [PATCH 05/10] Updated CreateAnimatedMap

---
 DESCRIPTION                        |   3 +-
 R/DataDiscoveryRetrieval.R         |  27 +++++-
 R/ResultFlagsIndependent.R         |   2 +-
 R/Visualizations.R                 |  46 +++++-----
 man/TADAdataRetrieval.Rd           |  24 ++++-
 vignettes/WQPDataHarmonization.Rmd | 138 ++++++++++++++++++++---------
 6 files changed, 170 insertions(+), 70 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f4a340f26..6877d38e6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -41,7 +41,8 @@ Imports:
     stats, 
     gganimate,
     lubridate,
-    maps
+    maps, 
+    usmap
 Depends: 
     R (>= 3.5.0)
 Suggests: 
diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index c251ce134..6787dbfe9 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -28,6 +28,10 @@
 #' to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns; 
 #' and to provide information about the result values that is needed to address
 #' censored data later on (i.e., nondetections)
+#' 
+#' Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
+#' to find allowable vales for queries, e.g.,
+#' \href{https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip}{countycode and statecode}
 #'  
 #' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 #' 
@@ -46,12 +50,29 @@
 #' 
 #' @export
 #'
-#' @examples 
+#' @examples
 #' \dontrun{
+#' 
 #' tada1 <- TADAdataRetrieval(statecode = "WI",
-#'                            countycode = "Dane",
-#'                            characteristicName = "Phosphorus")
+#' countycode = "Dane",
+#' characteristicName = "Phosphorus")
+#' 
+#' tada2 <- TADAdataRetrieval(ProjectIdentifier = "Anchorage Bacteria 20-21")
+#' 
+#' tada3 <- TADAdataRetrieval(statecode = "UT", 
+#'                            characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), 
+#'                            startDate = "10-01-2020")
+#' 
+#' # Users can reference the WQX domain table to find countycode and statecode
+#' # https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
+#' test4 <- TADAdataRetrieval(statecode = "SC", countycode  = "Abbeville")
+#' 
+#' # countycode queries require a statecode
+#' tada5 <- TADAdataRetrieval(countycode = "US:02:020")
+#' 
 #' }
+#' 
+
 TADAdataRetrieval <- function(statecode = "null",
                               startDate = "null",
                               countycode = "null", 
diff --git a/R/ResultFlagsIndependent.R b/R/ResultFlagsIndependent.R
index 859342fe6..15779ec41 100644
--- a/R/ResultFlagsIndependent.R
+++ b/R/ResultFlagsIndependent.R
@@ -1043,7 +1043,7 @@ InvalidCoordinates <- function(.data,
   
   # if clean_outsideUSA is "change sign", change the sign of lat/long coordinates outside of USA
   if (clean_outsideUSA == "change sign") {
-    print("Note: This is a temporary solution. Data owner should fix the raw data to address invalid coordinates through WQX. For assistance, email the WQX helpdesk (WQX@epa.gov).")
+    print("Note: When clean_outsideUSA == change sign, the sign of lat/long coordinates flagged as outside of USA are switched. This is a temporary solution. Data owners should fix the raw data to address invalid coordinates through WQX. For assistance fixing data errors you see in the WQP, email the WQX helpdesk (WQX@epa.gov).")
     .data <- .data %>% 
       dplyr::mutate(
         LatitudeMeasure = dplyr::case_when(
diff --git a/R/Visualizations.R b/R/Visualizations.R
index 69ab88b8a..79b06c14a 100644
--- a/R/Visualizations.R
+++ b/R/Visualizations.R
@@ -13,6 +13,7 @@
 CreateAnimatedMap <- function(.data) { 
   
   # code adapted from USGS blog: https://waterdata.usgs.gov/blog/large_sample_pull/
+  # reference: https://cran.r-project.org/web/packages/usmap/vignettes/advanced-mapping.html
   
   # pull the year from the date
   .data$year <- base::format(as.Date(.data$ActivityStartDate, format="%Y-%m-%d"),"%Y")
@@ -22,42 +23,45 @@ CreateAnimatedMap <- function(.data) {
     .data %>% 
     dplyr::group_by(MonitoringLocationIdentifier, LatitudeMeasure, LongitudeMeasure, year) %>% 
     dplyr::summarize(mean = mean(.data$ResultMeasureValue, na.rm = TRUE), 
-              median = stats::median(.data$ResultMeasureValue, na.rm = TRUE))
+                     median = stats::median(.data$ResultMeasureValue, na.rm = TRUE))
   
   # create a new character column with total nitrogen acceptable range designations
   n_bysite <- 
     n_bysite %>% dplyr::mutate(TN_mean=
-                               dplyr::case_when(mean<2 ~ "<2 mg/l", 
-                               mean>=2 & mean<=6 ~ "2-6 mg/l", 
-                               mean>6 ~ ">6 mg/l")) 
-
+                                 dplyr::case_when(mean<2 ~ "<2 mg/l", 
+                                                  mean>=2 & mean<=6 ~ "2-6 mg/l", 
+                                                  mean>6 ~ ">6 mg/l")) 
+  
   # convert latitude, longitude, and year data to numeric form
   n_bysite$LatitudeMeasure <- as.numeric(n_bysite$LatitudeMeasure)
   n_bysite$LongitudeMeasure <- as.numeric(n_bysite$LongitudeMeasure)
   n_bysite$year <- as.numeric(n_bysite$year)
   
-  # first, create the base map data frame
-  all_state <- "usa"
   
-  usa <- map_data("state", interior=TRUE)
-  base_map <- ggplot2::ggplot(data = usa, mapping = aes(x = long, 
-                                               y = lat, 
-                                               group = group)) +
-    ggplot2::geom_polygon(color = "black", fill = "white") +
-    ggplot2::coord_quickmap() +
-    ggplot2::theme_void() 
+  # plot the base map and add data to it
+  base_map <- 
+    map_with_data <- usmap::plot_usmap("counties", include = "AK", labels = FALSE) +
+    ggplot2::geom_point(data = usmap::usmap_transform(n_bysite, 
+                                                      input_names = c("LongitudeMeasure", "LatitudeMeasure"), 
+                                                      output_names = c("x", "y")), 
+                        aes(x = x, 
+                            y = y),
+                        color = "black", fill = "white")
   
-  # second, plot the base map and add data to it
   map_with_data <- base_map +
-    ggplot2::geom_point(data = n_bysite, aes(x = LongitudeMeasure, 
-                                    y = LatitudeMeasure, 
-                                    color = TN_mean, 
-                                    group = year, 
-                                    frame = year)) +
+    ggplot2::geom_point(data = usmap::usmap_transform(n_bysite, 
+                                             input_names = c("LongitudeMeasure", "LatitudeMeasure"), 
+                                             output_names = c("x", "y")), 
+               aes(x = x, 
+                   y = y, 
+                   color = TN_mean, 
+                   group = year, 
+                   frame = year)) +
     gganimate::transition_time(year) +
     ggplot2::ggtitle('Year: {frame_time}', # add year to the title
-            subtitle = 'Frame {frame} of {nframes}') +
+                     subtitle = 'Frame {frame} of {nframes}') +
     ggplot2::scale_colour_manual(values = c("blue", "red", "green")) 
+  
   num_years <- max(n_bysite$year)-min(n_bysite$year) + 1 
   
   # lastly, run the animation
diff --git a/man/TADAdataRetrieval.Rd b/man/TADAdataRetrieval.Rd
index e93d9277b..0ad997feb 100644
--- a/man/TADAdataRetrieval.Rd
+++ b/man/TADAdataRetrieval.Rd
@@ -72,12 +72,32 @@ to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns;
 and to provide information about the result values that is needed to address
 censored data later on (i.e., nondetections)
 
+Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
+to find allowable vales for queries, e.g.,
+\href{https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip}{countycode and statecode}
+
 See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 }
 \examples{
 \dontrun{
+
 tada1 <- TADAdataRetrieval(statecode = "WI",
-                           countycode = "Dane",
-                           characteristicName = "Phosphorus")
+countycode = "Dane",
+characteristicName = "Phosphorus")
+
+tada2 <- TADAdataRetrieval(ProjectIdentifier = "Anchorage Bacteria 20-21")
+
+tada3 <- TADAdataRetrieval(statecode = "UT", 
+                           characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), 
+                           startDate = "10-01-2020")
+
+# Users can reference the WQX domain table to find countycode and statecode
+# https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
+test4 <- TADAdataRetrieval(statecode = "SC", countycode  = "Abbeville")
+
+# countycode queries require a statecode
+tada5 <- TADAdataRetrieval(countycode = "US:02:020")
+
 }
+
 }
diff --git a/vignettes/WQPDataHarmonization.Rmd b/vignettes/WQPDataHarmonization.Rmd
index 8cad07416..6d62cea95 100644
--- a/vignettes/WQPDataHarmonization.Rmd
+++ b/vignettes/WQPDataHarmonization.Rmd
@@ -36,7 +36,7 @@ The following code will also install any packages you do not have, and
 load all packages required to run this vignette into your R session.
 
 ```{r, results = 'hide', message = FALSE, warning = FALSE}
-list.of.packages <- c("plyr", "data.table", "dataRetrieval", "dplyr", "ggplot2", "grDevices", "magrittr", "stringr", "utils", "RColorBrewer", "stats", "remotes", "gganimate", "gifski", "maps")
+list.of.packages <- c("plyr", "data.table", "dataRetrieval", "dplyr", "ggplot2", "grDevices", "magrittr", "stringr", "utils", "RColorBrewer", "stats", "remotes", "gganimate", "gifski", "maps", "usmap")
 
 new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 
@@ -85,6 +85,7 @@ library(dataRetrieval)
 library(gganimate)
 library(gifski)
 library(maps)
+library(usmap)
 library(TADA)
 ```
 
@@ -216,13 +217,30 @@ Additional resources:
 
 Option 1: Use the TADAdataRetrieval function.
 
+
+Uncomment below if you are interested in reviewing differences between the profiles you would get using readWQPdata vs. TADAdataRetrieval. The profiles are different because TADAdataRetrieval automatically joins in other data from different WQP profiles, and does some additional data cleaning as part of the data retrieval process.
+
 ```{r}
-# uncomment below if you would like to review differences between the profiles you would get using readWQPdata vs. TADAdataRetrieval. The profiles are different because TADAdataRetrieval automatically joins in other data from different WQP profiles, and does some additional data cleaning as part of the data retrieval process.
+# dataRetrievalProfile <- dataRetrieval::readWQPdata(statecode = "AK", characteristicName = c("Fecal Coliform", "Escherichia coli", "Enterococcus"), startDate = "05-01-2018", ignore_attributes = TRUE)
 
+#another example
 # dataRetrievalProfile <- dataRetrieval::readWQPdata(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "01-01-2021", ignore_attributes = TRUE)
 
-# You can edit the line below to define your own WQP query inputs
-TADAProfile <- TADAdataRetrieval(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "10-01-2020")
+```
+
+# Use the code below to download data from the WQP using TADAdataRetrieval
+
+Edit the code chuck below to define your own WQP query inputs
+
+```{r}
+# here is an example for Utah
+# TADAProfile <- TADAdataRetrieval(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "10-01-2020")
+
+# example for Alaska
+# TADAProfile <- TADAdataRetrieval(ProjectIdentifier = "Anchorage Bacteria 20-21")
+
+# another example query for Alaska. We will move forward with this example in the remainder of the vignette
+TADAProfile <- TADAdataRetrieval(statecode = "AK", characteristicName = c("Fecal Coliform", "Escherichia coli", "Enterococcus", "Ammonia", "Nitrate", "Nitrogen"), startDate = "05-01-2018")
 
 ```
 
@@ -230,6 +248,8 @@ Option 2: Alternatively, you can use the data.table::fread function to
 read in a web service call for any WQP profile (un-comment).
 
 ```{r}
+# example data retrieval pull for Utah
+
 # New_Draft_fullphyschem <- data.table::fread("https://www.waterqualitydata.us/data/Result/search?countrycode=US&statecode=US%3A49&siteid=UTAHDWQ_WQX-4925610&startDateLo=01-01-2015&startDateHi=12-31-2016&mimeType=csv&zip=no&sorted=yes&dataProfile=fullPhysChem&providers=NWIS&providers=STEWARDS&providers=STORET")
 ```
 
@@ -276,27 +296,12 @@ TADAProfile_CharSummary <- SummarizeCharacteristics(TADAProfile)
 Review station locations
 
 ```{r}
-# create new blank window for figure
-grDevices::dev.new(width=20, height=10, unit="in")
-
-# create a map of the world
+# create an empty map of the world
 maps::map()
 
-# uncomment below to create USA base map instead
-#maps::map("usa")
-
-# uncomment below to add state outlines to USA map
-#maps::map("state", add=TRUE)
+# draw the sites included in your TADAProfile onto the map
+data = points(TADAProfile$LongitudeMeasure, TADAProfile$LatitudeMeasure, col="red", pch=20)
 
-# uncomment below to add county outlines to USA map
-#maps::map("county", add=TRUE)
-
-
-# uncomment below to draw map of a specific state instead, with county lines
-# maps::map('county', 'utah')
-
-# draw the site locations onto the map
-graphics::points(TADAProfile$LongitudeMeasure, TADAProfile$LatitudeMeasure, col="red", pch=20)
 ```
 
 The TADA **InvalidCoordinates** function identifies and flags invalid
@@ -343,15 +348,64 @@ flags (if relevant to dataframe):
     as "Imprecise".
 
 ```{r}
-TADAProfileClean1 <- InvalidCoordinates(TADAProfile, clean_outsideUSA = "remove", clean_imprecise = TRUE)
-
-# redraw map after all rows with invalid LAT/LONG data are removed
-# create new blank window for figure
-grDevices::dev.new(width=20, height=10, unit="in")
+InvalidCoordinateFlags <- InvalidCoordinates(TADAProfile, clean_outsideUSA = "no", clean_imprecise = FALSE, errorsonly = TRUE)
+
+# review unique flags in InvalidCoordinateFlags
+unique(InvalidCoordinateFlags$TADA.InvalidCoordinates)
+
+# review unique MonitoringLocationIdentifiers in your flag dataframe
+unique(InvalidCoordinateFlags$MonitoringLocationIdentifier)
+
+Unique_InvalidCoordinateFlags <- InvalidCoordinateFlags %>%
+  select('MonitoringLocationIdentifier','MonitoringLocationName', 'TADA.InvalidCoordinates',
+         'OrganizationIdentifier', 'LongitudeMeasure', 'LatitudeMeasure', 'MonitoringLocationTypeName', 
+         'CountryCode', 'StateCode', 'CountyCode', 'HUCEightDigitCode', 'MonitoringLocationDescriptionText',
+         'ProjectName', 'ProjectIdentifier', 'OrganizationFormalName') %>%
+  distinct()
+
+# remove all data for sites with invalid or imprecise coordinates. Change function inputs to keep this data, but be aware it may impact your mapping. 
+TADAProfileClean1 <- InvalidCoordinates(TADAProfile, clean_outsideUSA = "remove", clean_imprecise = TRUE, errorsonly = FALSE)
+
+# redraw map (now all rows with invalid LAT/LONG data have been removed)
+
+# plot map of US
+usmap::plot_usmap() +
+  geom_point(data = usmap_transform(TADAProfileClean1, 
+                                    input_names = c("LongitudeMeasure", "LatitudeMeasure"), 
+                                    output_names = c("x", "y")), 
+             aes(x = x, y = y),
+             color = "red", alpha = 0.25) +
+  labs(title = "US Map",
+       subtitle = "These are the sites in TADAProfile1") +
+  theme(legend.position = "right")
+
+# plot map of state
+usmap::plot_usmap("counties", include = "AK", labels = FALSE) +
+  geom_point(data = usmap_transform(TADAProfileClean1, 
+                                    input_names = c("LongitudeMeasure", "LatitudeMeasure"), 
+                                    output_names = c("x", "y")), 
+             aes(x = x, y = y),
+             color = "red", alpha = 0.25) +
+  labs(title = "Alaska",
+       subtitle = "These are the sites in TADAProfile1")
+
+# plot map of county
+# first, find unique county codes in your dataframe
+unique(TADAProfile$CountyCode)
+unique(TADAProfile$StateCode)
+
+# county code domain list for WQP: https://www.waterqualitydata.us/Codes/countycode
+# filter map down to the counties in your dataframe. Here, the state and county codes translate to "02020". 
+ 
+usmap::plot_usmap("counties", include = c("02122", "02090", "02130", "02270", "02170", "02110", "02282", "02275", "02198", "02185", "02261", "02195", "02105","02020", "02100", "02050", "02220", "02150", "02180",  "02016", "02230", "02240", "02290", "02188"), labels = FALSE) +
+  geom_point(data = usmap_transform(TADAProfileClean1, 
+                                    input_names = c("LongitudeMeasure", "LatitudeMeasure"), 
+                                    output_names = c("x", "y")), 
+             aes(x = x, y = y),
+             color = "red", alpha = 0.25) +
+  labs(title = "Alaska",
+       subtitle = "These are the sites in TADAProfile1")
 
-# draw map
-maps::map('county', 'utah')
-graphics::points(TADAProfileClean1$LongitudeMeasure, TADAProfileClean1$LatitudeMeasure, col="red", pch=20)
 ```
 
 ## Depth unit conversions
@@ -666,8 +720,8 @@ Here is a list of other fields to review:
         Other codes may designate suspect data or other flags which may be described in detail in **ResultLaboratoryCommentText** or another column
 
 ```{r, fig.width=6, fig.height=2, fig.fullwidth=TRUE}
-# create new blank window for figure
-grDevices::dev.new(width=20, height=10, unit="in")
+# uncomment to create new blank window for figure
+# grDevices::dev.new(width=20, height=10, unit="in")
 
 # run filter function
 FilterFieldReview("ActivityTypeCode", TADAProfileClean13)
@@ -692,8 +746,8 @@ we want to remove -- we'll look at the values in the
 ResultStatusIdentifier field.
 
 ```{r, fig.width=6, fig.height=2, fig.fullwidth=TRUE}
-# create new blank window for figure
-grDevices::dev.new(width=20, height=10, unit="in")
+# uncomment to create new blank window for figure
+# grDevices::dev.new(width=20, height=10, unit="in")
 
 FilterFieldReview("ActivityMediaSubdivisionName", TADAProfileClean14)
 ```
@@ -735,8 +789,8 @@ Then choose a field from the list. In this example we'll remove certain
 values from the HydrologicEvent field.
 
 ```{r, fig.width=6, fig.height=2, fig.fullwidth=TRUE}
-# create new blank window for figure
-grDevices::dev.new(width=20, height=10, unit="in")
+# uncomment to create new blank window for figure
+# grDevices::dev.new(width=20, height=10, unit="in")
 
 FilterParFieldReview("HydrologicEvent", TADAProfileClean15, "NITROGEN")
 ```
@@ -813,7 +867,7 @@ Address censored data for a single characteristic.
 ```{r}
 # how to point to a column in R: yourdataset$thecolumnyouwant
 # e.g., temp$ResultMeasureValueNitrogenExample <- filter(TADAProfileClean17, CharacteristicName == "NITROGEN")
-SingleCharacteristic <- filter(TADAProfileClean17, CharacteristicName == "TOTAL NITROGEN, MIXED FORMS")
+SingleCharacteristic <- filter(TADAProfileClean17, CharacteristicName == "NITROGEN")
 
 # review the types of censored data you have in your data frame
 table(SingleCharacteristic$ResultDetectionConditionText)
@@ -821,19 +875,19 @@ table(SingleCharacteristic$ResultDetectionConditionText)
 # review the number of results that are NA's
 sum(is.na(SingleCharacteristic$ResultMeasureValue))
 
-# substitute a value for NA. In this example, 0.02 MG/L is used for TOTAL NITROGEN, MIXED FORMS
+# substitute a value for NA. In this example, 0.02 MG/L is used for NITROGEN
 SingleCharacteristic$ResultMeasureValue <- ifelse(is.na(SingleCharacteristic$ResultMeasureValue) == TRUE, 0.02, SingleCharacteristic$ResultMeasureValue)
 ```
 
 Summarize results for a single characteristic. This currently only works
-for Total Nitrogen. Updates are coming soon.
+for Total Nitrogen and Nitrogen. Updates are coming soon.
 
 ```{r}
-summary(SingleCharacteristic$ResultMeasureValue)
+summary(SingleCharacteristic$ResultMeasureValue, na.rm = TRUE)
 # add na.rm = TRUE to the function above to strip NA values before the mean computation proceeds
 
-# create new blank window for figure
-grDevices::dev.new(width=20, height=10, unit="in")
+# uncomment to create new blank window for figure
+# grDevices::dev.new(width=20, height=10, unit="in")
 
 # creat histogram
 hist(SingleCharacteristic$ResultMeasureValue)

From 2c35052bbcbac2f6bababd34503cec8760ca900a Mon Sep 17 00:00:00 2001
From: Cristina Mullin <46969696+cristinamullin@users.noreply.github.com>
Date: Wed, 1 Feb 2023 08:18:05 -0500
Subject: [PATCH 06/10] Update DataDiscoveryRetrieval.R

---
 R/DataDiscoveryRetrieval.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 6787dbfe9..4bcc7ece9 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -30,8 +30,7 @@
 #' censored data later on (i.e., nondetections)
 #' 
 #' Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
-#' to find allowable vales for queries, e.g.,
-#' \href{https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip}{countycode and statecode}
+#' to find allowable vales for queries, e.g., https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
 #'  
 #' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 #' 
@@ -513,4 +512,4 @@ JoinWQPProfiles <- function(FullPhysChem = "null",
     
   }else{join3 <- join2}
   return(join3)
-}
\ No newline at end of file
+}

From c27ca60f98fbee6124ef51c002da8a1b648e2650 Mon Sep 17 00:00:00 2001
From: Cristina Mullin <46969696+cristinamullin@users.noreply.github.com>
Date: Wed, 1 Feb 2023 08:20:04 -0500
Subject: [PATCH 07/10] Update DataDiscoveryRetrieval.R

---
 R/DataDiscoveryRetrieval.R | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 4bcc7ece9..6106c0496 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -30,8 +30,8 @@
 #' censored data later on (i.e., nondetections)
 #' 
 #' Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
-#' to find allowable vales for queries, e.g., https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
-#'  
+#' to find allowable vales for queries, e.g., reference the WQX domain table to find countycode and statecode: https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
+#'   
 #' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 #' 
 #' @param statecode Code that identifies a state
@@ -62,8 +62,7 @@
 #'                            characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), 
 #'                            startDate = "10-01-2020")
 #' 
-#' # Users can reference the WQX domain table to find countycode and statecode
-#' # https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
+
 #' test4 <- TADAdataRetrieval(statecode = "SC", countycode  = "Abbeville")
 #' 
 #' # countycode queries require a statecode

From 4e2c2c91d8308a0582d814986c363031ab25ae8a Mon Sep 17 00:00:00 2001
From: Cristina Mullin <46969696+cristinamullin@users.noreply.github.com>
Date: Wed, 1 Feb 2023 08:21:34 -0500
Subject: [PATCH 08/10] Update DataDiscoveryRetrieval.R


From 787a853254df8d28ab1215f9030d3be7f77c0e77 Mon Sep 17 00:00:00 2001
From: Mullin <Mullin.Cristina@epa.gov>
Date: Wed, 1 Feb 2023 10:58:54 -0500
Subject: [PATCH 09/10] update dependencies

added maptools to vignette
---
 man/TADAdataRetrieval.Rd           | 5 +----
 vignettes/WQPDataHarmonization.Rmd | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/man/TADAdataRetrieval.Rd b/man/TADAdataRetrieval.Rd
index 0ad997feb..b4239d950 100644
--- a/man/TADAdataRetrieval.Rd
+++ b/man/TADAdataRetrieval.Rd
@@ -73,8 +73,7 @@ and to provide information about the result values that is needed to address
 censored data later on (i.e., nondetections)
 
 Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
-to find allowable vales for queries, e.g.,
-\href{https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip}{countycode and statecode}
+to find allowable vales for queries, e.g., reference the WQX domain table to find countycode and statecode: https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
 
 See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 }
@@ -91,8 +90,6 @@ tada3 <- TADAdataRetrieval(statecode = "UT",
                            characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), 
                            startDate = "10-01-2020")
 
-# Users can reference the WQX domain table to find countycode and statecode
-# https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
 test4 <- TADAdataRetrieval(statecode = "SC", countycode  = "Abbeville")
 
 # countycode queries require a statecode
diff --git a/vignettes/WQPDataHarmonization.Rmd b/vignettes/WQPDataHarmonization.Rmd
index 6d62cea95..0d9c96b66 100644
--- a/vignettes/WQPDataHarmonization.Rmd
+++ b/vignettes/WQPDataHarmonization.Rmd
@@ -36,7 +36,7 @@ The following code will also install any packages you do not have, and
 load all packages required to run this vignette into your R session.
 
 ```{r, results = 'hide', message = FALSE, warning = FALSE}
-list.of.packages <- c("plyr", "data.table", "dataRetrieval", "dplyr", "ggplot2", "grDevices", "magrittr", "stringr", "utils", "RColorBrewer", "stats", "remotes", "gganimate", "gifski", "maps", "usmap")
+list.of.packages <- c("plyr", "data.table", "dataRetrieval", "dplyr", "ggplot2", "grDevices", "magrittr", "stringr", "utils", "RColorBrewer", "stats", "remotes", "gganimate", "gifski", "maps", "usmap", "maptools")
 
 new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 

From a53448bbe1ff03d8321ccf4b6b5d9ec6a21dddb8 Mon Sep 17 00:00:00 2001
From: Mullin <Mullin.Cristina@epa.gov>
Date: Wed, 1 Feb 2023 14:51:51 -0500
Subject: [PATCH 10/10] update description

---
 DESCRIPTION                        | 9 ++++++++-
 vignettes/WQPDataHarmonization.Rmd | 3 ++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6877d38e6..3f57913de 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,6 +17,12 @@ Authors@R:
            role = "aut"),
     person(given = "Laura",
            family = "Shumway",
+           role = "aut"),
+        person(given = "Elise",
+           family = "Hinman",
+           role = "aut"),
+    person(given = "Kathleen",
+           family = "Healy",
            role = "aut")
     )
 Description: Assists data partners in performing automated assessments with
@@ -40,14 +46,15 @@ Imports:
     RColorBrewer,
     stats, 
     gganimate,
+    gifski,
     lubridate,
     maps, 
+    maptools, 
     usmap
 Depends: 
     R (>= 3.5.0)
 Suggests: 
     readr,
-    gifski,
     rlang,
     remotes, 
     tidyverse,
diff --git a/vignettes/WQPDataHarmonization.Rmd b/vignettes/WQPDataHarmonization.Rmd
index 0d9c96b66..913b7cfe6 100644
--- a/vignettes/WQPDataHarmonization.Rmd
+++ b/vignettes/WQPDataHarmonization.Rmd
@@ -36,7 +36,7 @@ The following code will also install any packages you do not have, and
 load all packages required to run this vignette into your R session.
 
 ```{r, results = 'hide', message = FALSE, warning = FALSE}
-list.of.packages <- c("plyr", "data.table", "dataRetrieval", "dplyr", "ggplot2", "grDevices", "magrittr", "stringr", "utils", "RColorBrewer", "stats", "remotes", "gganimate", "gifski", "maps", "usmap", "maptools")
+list.of.packages <- c("plyr", "data.table", "dataRetrieval", "dplyr", "ggplot2", "grDevices", "magrittr", "stringr", "utils", "RColorBrewer", "stats", "remotes", "gganimate", "gifski", "maps", "usmap", "maptools", "xfun")
 
 new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 
@@ -86,6 +86,7 @@ library(gganimate)
 library(gifski)
 library(maps)
 library(usmap)
+library(maptools)
 library(TADA)
 ```