USEPA · cristinamullin · Feb 1, 2023 · Jan 30, 2023 · Jan 30, 2023 · Jan 31, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,6 +17,12 @@ Authors@R:
            role = "aut"),
     person(given = "Laura",
            family = "Shumway",
+           role = "aut"),
+        person(given = "Elise",
+           family = "Hinman",
+           role = "aut"),
+    person(given = "Kathleen",
+           family = "Healy",
            role = "aut")
     )
 Description: Assists data partners in performing automated assessments with
@@ -40,13 +46,15 @@ Imports:
     RColorBrewer,
     stats, 
     gganimate,
+    gifski,
     lubridate,
-    maps
+    maps, 
+    maptools, 
+    usmap
 Depends: 
     R (>= 3.5.0)
 Suggests: 
     readr,
-    gifski,
     rlang,
     remotes, 
     tidyverse,

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
@@ -28,7 +28,10 @@
 #' to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns; 
 #' and to provide information about the result values that is needed to address
 #' censored data later on (i.e., nondetections)
-#'  
+#' 
+#' Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
+#' to find allowable vales for queries, e.g., reference the WQX domain table to find countycode and statecode: https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
+#'   
 #' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
 #' 
 #' @param statecode Code that identifies a state
@@ -46,12 +49,28 @@
 #' 
 #' @export
 #'
-#' @examples 
+#' @examples
 #' \dontrun{
+#' 
 #' tada1 <- TADAdataRetrieval(statecode = "WI",
-#'                            countycode = "Dane",
-#'                            characteristicName = "Phosphorus")
+#' countycode = "Dane",
+#' characteristicName = "Phosphorus")
+#' 
+#' tada2 <- TADAdataRetrieval(ProjectIdentifier = "Anchorage Bacteria 20-21")
+#' 
+#' tada3 <- TADAdataRetrieval(statecode = "UT", 
+#'                            characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), 
+#'                            startDate = "10-01-2020")
+#' 
+
+#' test4 <- TADAdataRetrieval(statecode = "SC", countycode  = "Abbeville")
+#' 
+#' # countycode queries require a statecode
+#' tada5 <- TADAdataRetrieval(countycode = "US:02:020")
+#' 
 #' }
+#' 
+
 TADAdataRetrieval <- function(statecode = "null",
                               startDate = "null",
                               countycode = "null", 
@@ -245,9 +264,14 @@ TADAReadWQPWebServices <- function(webservice) {
 #' "TADA.DetectionLimitMeasureValue.Flag" are created to track and changes made
 #' to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns; 
 #' and to provide information about the result values that is needed to address
-#'  censored data later on (i.e., nondetections)
-#'  
+#' censored data later on (i.e., nondetections)
+#' 
+#' Some code for this function was adapted from this USGS Blog (Author: Aliesha Krall)
+#' \href{https://waterdata.usgs.gov/blog/large_sample_pull/}{Large Sample Pull}  
+#' 
 #' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
+#' 
+#'
 #'
 #' @param startDate Start Date YYYY-MM-DD format, for example, "1995-01-01"
 #' @param endDate end date in YYYY-MM-DD format, for example, "2020-12-31"
@@ -264,62 +288,76 @@ TADAReadWQPWebServices <- function(webservice) {
 #' tada2 <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", siteType = "Stream")
 #' 
 #' tada3 <- TADABigdataRetrieval(characteristicName = "Phosphorus")
-#' 
-#' tada3 <- TADABigdataRetrieval(statecode = "CT")
+#'
 #' }
 #' 
 
 
 TADABigdataRetrieval <- function(startDate = "null",
                               endDate = "null",
-                              statecode = character(0),
+                              statecode = "null",
                               characteristicName = "null", 
                               siteType = "null"
 ) {
+
+  if(!startDate=="null"){
+    startDate_Low = lubridate::ymd(startDate)
+    startYearLo = lubridate::year(startDate_Low)
+  }else{ # else: pick a date before which any data are unlikely to be in WQP
+    startDate = "1800-01-01"
+    startDate_Low = lubridate::ymd(startDate)
+    startYearLo = lubridate::year(startDate_Low)
+  } 
 
-  startDate_Low = lubridate::ymd(startDate)
-  startYearLo = lubridate::year(startDate_Low)
-
-  endDate_High = lubridate::ymd(endDate)
-  startYearHi = lubridate::year(endDate_High)
+# Logic: if the input endDate is not null, convert to date and obtain year
+  # for summary
+  if(!endDate=="null"){
+    endDate_High = lubridate::ymd(endDate)
+    endYearHi = lubridate::year(endDate_High)
+  }else{ # Else, if not populated, default to using today's date/year for summary
+    endDate = Sys.Date()
+    endDate_High = lubridate::ymd(endDate)
+    endYearHi = lubridate::year(endDate_High)
+  }
 
+  # Create WQPsummary query
+  WQPquery <- list()
   if (length(characteristicName)>1) {
-    characteristicName = list(characteristicName) 
+    WQPquery = c(WQPquery,characteristicName = list(characteristicName)) 
   } else if (characteristicName != "null") {
-    characteristicName = characteristicName
+    WQPquery = c(WQPquery,characteristicName = characteristicName)
   }
-
   if (length(siteType)>1) {
-    siteType = list(siteType)
+    WQPquery = c(WQPquery,siteType = list(siteType)) 
   } else if (siteType != "null") {
-    siteType = siteType
+    WQPquery = c(WQPquery,siteType = siteType)
   }
-
-  state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv")
-
-  if(length(statecode)>0){
+
+  if (!statecode=="null") {
+    state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv")
     statecode = as.character(statecode)
     state_cd_cont = state_cd_cont%>%filter(STUSAB%in%statecode)
+    statecd = state_cd_cont$STATE
     if(nrow(state_cd_cont)==0){stop("State code is not valid. Check FIPS state/territory abbreviations.")}
+    if(length(statecode)>1){
+      WQPquery = c(WQPquery, statecode=list(statecd))
+    }else{WQPquery = c(WQPquery, statecode=statecd)}
   }
 
-  for(i in seq_len(nrow(state_cd_cont))){
-
-    state_cd = as.numeric(state_cd_cont$STATE[i])
-    state_nm = state_cd_cont$STUSAB[i]
+  df_summary = dataRetrieval::readWQPsummary(WQPquery)
+
     ## NOTE: if query brings back no results, function returns empty 
     # dataRetrieval profile, not empty summary
-    df_summary = dataRetrieval::readWQPsummary(statecode = state_cd,
-                     characteristicName = characteristicName, 
-                     siteType = siteType)
     if(nrow(df_summary)>0){
       sites = df_summary %>%
         dplyr::filter(YearSummarized >= startYearLo,
-                    YearSummarized <= startYearHi)
+                      YearSummarized <= endYearHi)
+
       siteid_all = unique(sites$MonitoringLocationIdentifier)
+      rm(df_summary) # save some space
 
       if(length(siteid_all) > 0) {
-        #print(paste0("Grabbing ",state_nm," data from ",length(siteid_all)," sites."))
+        rm(sites) # save some space
         l=length(siteid_all)  #len(sites)
         maxsites=100   #max number of sites pulled per WQP query
         #may want to consider using the total number of records in a given 
@@ -371,60 +409,16 @@ TADABigdataRetrieval <- function(startDate = "null",
           df = dplyr::bind_rows(df, joins)
         }
       }else{
-        joins = data.frame()
-        # print(paste0(state_nm, " returned no data."))
-        }
-
-      if(nrow(df) > 0){
-
-      #####
-      #need to edit below if temporary rds files do not go away
-      #may be able to delete below
-      #https://stackoverflow.com/questions/47626331/saving-and-retrieving-temp-files-in-r-packages
-      #####
-
-      #original
-      #saveRDS(df_state, file = paste0(state_nm, "_raw_data.rds"))
-
-        tempfilename = paste0(state_nm, "_raw_data.rds")
-        file.path(tempdir(), saveRDS(df, file = paste0("inst/tempdata/", tempfilename)))
-
-        }
-      } #else{print(paste0(state_nm, " had no data."))}
-  }
-    all_data <- data.frame()
-    stdir = list.files("inst/tempdata/")
-
-    for(k in 1:length(stdir)){
-      path = paste0("inst/tempdata/",stdir[k])
-      allstates_df <- tryCatch({
-        #####
-        #need to edit line below if rds files do not go away
-        #####
-
-        #original below
-       #readRDS(paste0(state, "_raw_data.rds"))
-
-        readRDS(path)
-      })
-      unlink(path)
-
-      if(nrow(allstates_df) > 0){
-        allstates_df$ResultMeasureValue = as.character(allstates_df$ResultMeasureValue)
-        allstates_df$HorizontalAccuracyMeasure.MeasureValue = as.character(allstates_df$HorizontalAccuracyMeasure.MeasureValue)
-        allstates_df$ActivityDepthHeightMeasure.MeasureValue = as.character(allstates_df$ActivityDepthHeightMeasure.MeasureValue)
-        allstates_df$DetectionQuantitationLimitMeasure.MeasureValue = as.character(allstates_df$DetectionQuantitationLimitMeasure.MeasureValue)
-        all_data <- dplyr::bind_rows(all_data, allstates_df)
+        warning("Query returned no data. Function returns an empty dataframe.")
+        return(sites)
       }
+    }else{
+      warning("Query returned no data. Function returns an empty dataframe.")
+      return(df_summary)
+}
 
-    } 
-
-    # # Do not need if date input works in dataRetrieval functions
-    # finalprofile = all_data %>%
-    #   dplyr::filter(ActivityStartDate <= endDate,
-    #                 ActivityStartDate >= startDate)
 
-    finalprofile = autoclean(all_data)
+    finalprofile = autoclean(df)
     #not sure if above is working correctly, thousands of "duplicated" rows are removed
     # you will still need to filter on activity media subdivision now
 
@@ -517,4 +511,4 @@ JoinWQPProfiles <- function(FullPhysChem = "null",
 
   }else{join3 <- join2}
   return(join3)
-}
+}
diff --git a/R/ResultFlagsIndependent.R b/R/ResultFlagsIndependent.R
@@ -1043,7 +1043,7 @@ InvalidCoordinates <- function(.data,
 
   # if clean_outsideUSA is "change sign", change the sign of lat/long coordinates outside of USA
   if (clean_outsideUSA == "change sign") {
-    print("Note: This is a temporary solution. Data owner should fix the raw data to address invalid coordinates through WQX. For assistance, email the WQX helpdesk ([email protected]).")
+    print("Note: When clean_outsideUSA == change sign, the sign of lat/long coordinates flagged as outside of USA are switched. This is a temporary solution. Data owners should fix the raw data to address invalid coordinates through WQX. For assistance fixing data errors you see in the WQP, email the WQX helpdesk ([email protected]).")
     .data <- .data %>% 
       dplyr::mutate(
         LatitudeMeasure = dplyr::case_when(

diff --git a/R/Visualizations.R b/R/Visualizations.R
@@ -13,6 +13,7 @@
 CreateAnimatedMap <- function(.data) { 
 
   # code adapted from USGS blog: https://waterdata.usgs.gov/blog/large_sample_pull/
+  # reference: https://cran.r-project.org/web/packages/usmap/vignettes/advanced-mapping.html
 
   # pull the year from the date
   .data$year <- base::format(as.Date(.data$ActivityStartDate, format="%Y-%m-%d"),"%Y")
@@ -22,42 +23,45 @@ CreateAnimatedMap <- function(.data) {
     .data %>% 
     dplyr::group_by(MonitoringLocationIdentifier, LatitudeMeasure, LongitudeMeasure, year) %>% 
     dplyr::summarize(mean = mean(.data$ResultMeasureValue, na.rm = TRUE), 
-              median = stats::median(.data$ResultMeasureValue, na.rm = TRUE))
+                     median = stats::median(.data$ResultMeasureValue, na.rm = TRUE))
 
   # create a new character column with total nitrogen acceptable range designations
   n_bysite <- 
     n_bysite %>% dplyr::mutate(TN_mean=
-                               dplyr::case_when(mean<2 ~ "<2 mg/l", 
-                               mean>=2 & mean<=6 ~ "2-6 mg/l", 
-                               mean>6 ~ ">6 mg/l")) 
-
+                                 dplyr::case_when(mean<2 ~ "<2 mg/l", 
+                                                  mean>=2 & mean<=6 ~ "2-6 mg/l", 
+                                                  mean>6 ~ ">6 mg/l")) 
+  
   # convert latitude, longitude, and year data to numeric form
   n_bysite$LatitudeMeasure <- as.numeric(n_bysite$LatitudeMeasure)
   n_bysite$LongitudeMeasure <- as.numeric(n_bysite$LongitudeMeasure)
   n_bysite$year <- as.numeric(n_bysite$year)
 
-  # first, create the base map data frame
-  all_state <- "usa"
 
-  usa <- map_data("state", interior=TRUE)
-  base_map <- ggplot2::ggplot(data = usa, mapping = aes(x = long, 
-                                               y = lat, 
-                                               group = group)) +
-    ggplot2::geom_polygon(color = "black", fill = "white") +
-    ggplot2::coord_quickmap() +
-    ggplot2::theme_void() 
+  # plot the base map and add data to it
+  base_map <- 
+    map_with_data <- usmap::plot_usmap("counties", include = "AK", labels = FALSE) +
+    ggplot2::geom_point(data = usmap::usmap_transform(n_bysite, 
+                                                      input_names = c("LongitudeMeasure", "LatitudeMeasure"), 
+                                                      output_names = c("x", "y")), 
+                        aes(x = x, 
+                            y = y),
+                        color = "black", fill = "white")
 
-  # second, plot the base map and add data to it
   map_with_data <- base_map +
-    ggplot2::geom_point(data = n_bysite, aes(x = LongitudeMeasure, 
-                                    y = LatitudeMeasure, 
-                                    color = TN_mean, 
-                                    group = year, 
-                                    frame = year)) +
+    ggplot2::geom_point(data = usmap::usmap_transform(n_bysite, 
+                                             input_names = c("LongitudeMeasure", "LatitudeMeasure"), 
+                                             output_names = c("x", "y")), 
+               aes(x = x, 
+                   y = y, 
+                   color = TN_mean, 
+                   group = year, 
+                   frame = year)) +
     gganimate::transition_time(year) +
     ggplot2::ggtitle('Year: {frame_time}', # add year to the title
-            subtitle = 'Frame {frame} of {nframes}') +
+                     subtitle = 'Frame {frame} of {nframes}') +
     ggplot2::scale_colour_manual(values = c("blue", "red", "green")) 
+
   num_years <- max(n_bysite$year)-min(n_bysite$year) + 1 
 
   # lastly, run the animation

diff --git a/man/TADABigdataRetrieval.Rd b/man/TADABigdataRetrieval.Rd