Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ Authors@R:
role = "aut"),
person(given = "Laura",
family = "Shumway",
role = "aut"),
person(given = "Elise",
family = "Hinman",
role = "aut"),
person(given = "Kathleen",
family = "Healy",
role = "aut")
)
Description: Assists data partners in performing automated assessments with
Expand All @@ -40,13 +46,15 @@ Imports:
RColorBrewer,
stats,
gganimate,
gifski,
lubridate,
maps
maps,
maptools,
usmap
Depends:
R (>= 3.5.0)
Suggests:
readr,
gifski,
rlang,
remotes,
tidyverse,
Expand Down
162 changes: 78 additions & 84 deletions R/DataDiscoveryRetrieval.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
#' to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns;
#' and to provide information about the result values that is needed to address
#' censored data later on (i.e., nondetections)
#'
#'
#' Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
#' to find allowable vales for queries, e.g., reference the WQX domain table to find countycode and statecode: https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
#'
#' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
#'
#' @param statecode Code that identifies a state
Expand All @@ -46,12 +49,28 @@
#'
#' @export
#'
#' @examples
#' @examples
#' \dontrun{
#'
#' tada1 <- TADAdataRetrieval(statecode = "WI",
#' countycode = "Dane",
#' characteristicName = "Phosphorus")
#' countycode = "Dane",
#' characteristicName = "Phosphorus")
#'
#' tada2 <- TADAdataRetrieval(ProjectIdentifier = "Anchorage Bacteria 20-21")
#'
#' tada3 <- TADAdataRetrieval(statecode = "UT",
#' characteristicName = c("Ammonia", "Nitrate", "Nitrogen"),
#' startDate = "10-01-2020")
#'

#' test4 <- TADAdataRetrieval(statecode = "SC", countycode = "Abbeville")
#'
#' # countycode queries require a statecode
#' tada5 <- TADAdataRetrieval(countycode = "US:02:020")
#'
#' }
#'

TADAdataRetrieval <- function(statecode = "null",
startDate = "null",
countycode = "null",
Expand Down Expand Up @@ -245,9 +264,14 @@ TADAReadWQPWebServices <- function(webservice) {
#' "TADA.DetectionLimitMeasureValue.Flag" are created to track and changes made
#' to the "ResultMeasureValue" and "DetectionLimitMeasureValue" columns;
#' and to provide information about the result values that is needed to address
#' censored data later on (i.e., nondetections)
#'
#' censored data later on (i.e., nondetections)
#'
#' Some code for this function was adapted from this USGS Blog (Author: Aliesha Krall)
#' \href{https://waterdata.usgs.gov/blog/large_sample_pull/}{Large Sample Pull}
#'
#' See ?MeasureValueSpecialCharacters and ?autoclean documentation for more information.
#'
#'
#'
#' @param startDate Start Date YYYY-MM-DD format, for example, "1995-01-01"
#' @param endDate end date in YYYY-MM-DD format, for example, "2020-12-31"
Expand All @@ -264,62 +288,76 @@ TADAReadWQPWebServices <- function(webservice) {
#' tada2 <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", siteType = "Stream")
#'
#' tada3 <- TADABigdataRetrieval(characteristicName = "Phosphorus")
#'
#' tada3 <- TADABigdataRetrieval(statecode = "CT")
#'
#' }
#'


TADABigdataRetrieval <- function(startDate = "null",
endDate = "null",
statecode = character(0),
statecode = "null",
characteristicName = "null",
siteType = "null"
) {

if(!startDate=="null"){
startDate_Low = lubridate::ymd(startDate)
startYearLo = lubridate::year(startDate_Low)
}else{ # else: pick a date before which any data are unlikely to be in WQP
startDate = "1800-01-01"
startDate_Low = lubridate::ymd(startDate)
startYearLo = lubridate::year(startDate_Low)
}

startDate_Low = lubridate::ymd(startDate)
startYearLo = lubridate::year(startDate_Low)

endDate_High = lubridate::ymd(endDate)
startYearHi = lubridate::year(endDate_High)
# Logic: if the input endDate is not null, convert to date and obtain year
# for summary
if(!endDate=="null"){
endDate_High = lubridate::ymd(endDate)
endYearHi = lubridate::year(endDate_High)
}else{ # Else, if not populated, default to using today's date/year for summary
endDate = Sys.Date()
endDate_High = lubridate::ymd(endDate)
endYearHi = lubridate::year(endDate_High)
}

# Create WQPsummary query
WQPquery <- list()
if (length(characteristicName)>1) {
characteristicName = list(characteristicName)
WQPquery = c(WQPquery,characteristicName = list(characteristicName))
} else if (characteristicName != "null") {
characteristicName = characteristicName
WQPquery = c(WQPquery,characteristicName = characteristicName)
}

if (length(siteType)>1) {
siteType = list(siteType)
WQPquery = c(WQPquery,siteType = list(siteType))
} else if (siteType != "null") {
siteType = siteType
WQPquery = c(WQPquery,siteType = siteType)
}

state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv")

if(length(statecode)>0){

if (!statecode=="null") {
state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv")
statecode = as.character(statecode)
state_cd_cont = state_cd_cont%>%filter(STUSAB%in%statecode)
statecd = state_cd_cont$STATE
if(nrow(state_cd_cont)==0){stop("State code is not valid. Check FIPS state/territory abbreviations.")}
if(length(statecode)>1){
WQPquery = c(WQPquery, statecode=list(statecd))
}else{WQPquery = c(WQPquery, statecode=statecd)}
}

for(i in seq_len(nrow(state_cd_cont))){

state_cd = as.numeric(state_cd_cont$STATE[i])
state_nm = state_cd_cont$STUSAB[i]
df_summary = dataRetrieval::readWQPsummary(WQPquery)

## NOTE: if query brings back no results, function returns empty
# dataRetrieval profile, not empty summary
df_summary = dataRetrieval::readWQPsummary(statecode = state_cd,
characteristicName = characteristicName,
siteType = siteType)
if(nrow(df_summary)>0){
sites = df_summary %>%
dplyr::filter(YearSummarized >= startYearLo,
YearSummarized <= startYearHi)
YearSummarized <= endYearHi)

siteid_all = unique(sites$MonitoringLocationIdentifier)
rm(df_summary) # save some space

if(length(siteid_all) > 0) {
#print(paste0("Grabbing ",state_nm," data from ",length(siteid_all)," sites."))
rm(sites) # save some space
l=length(siteid_all) #len(sites)
maxsites=100 #max number of sites pulled per WQP query
#may want to consider using the total number of records in a given
Expand Down Expand Up @@ -371,60 +409,16 @@ TADABigdataRetrieval <- function(startDate = "null",
df = dplyr::bind_rows(df, joins)
}
}else{
joins = data.frame()
# print(paste0(state_nm, " returned no data."))
}

if(nrow(df) > 0){

#####
#need to edit below if temporary rds files do not go away
#may be able to delete below
#https://stackoverflow.com/questions/47626331/saving-and-retrieving-temp-files-in-r-packages
#####

#original
#saveRDS(df_state, file = paste0(state_nm, "_raw_data.rds"))

tempfilename = paste0(state_nm, "_raw_data.rds")
file.path(tempdir(), saveRDS(df, file = paste0("inst/tempdata/", tempfilename)))

}
} #else{print(paste0(state_nm, " had no data."))}
}
all_data <- data.frame()
stdir = list.files("inst/tempdata/")

for(k in 1:length(stdir)){
path = paste0("inst/tempdata/",stdir[k])
allstates_df <- tryCatch({
#####
#need to edit line below if rds files do not go away
#####

#original below
#readRDS(paste0(state, "_raw_data.rds"))

readRDS(path)
})
unlink(path)

if(nrow(allstates_df) > 0){
allstates_df$ResultMeasureValue = as.character(allstates_df$ResultMeasureValue)
allstates_df$HorizontalAccuracyMeasure.MeasureValue = as.character(allstates_df$HorizontalAccuracyMeasure.MeasureValue)
allstates_df$ActivityDepthHeightMeasure.MeasureValue = as.character(allstates_df$ActivityDepthHeightMeasure.MeasureValue)
allstates_df$DetectionQuantitationLimitMeasure.MeasureValue = as.character(allstates_df$DetectionQuantitationLimitMeasure.MeasureValue)
all_data <- dplyr::bind_rows(all_data, allstates_df)
warning("Query returned no data. Function returns an empty dataframe.")
return(sites)
}
}else{
warning("Query returned no data. Function returns an empty dataframe.")
return(df_summary)
}

}

# # Do not need if date input works in dataRetrieval functions
# finalprofile = all_data %>%
# dplyr::filter(ActivityStartDate <= endDate,
# ActivityStartDate >= startDate)

finalprofile = autoclean(all_data)
finalprofile = autoclean(df)
#not sure if above is working correctly, thousands of "duplicated" rows are removed
# you will still need to filter on activity media subdivision now

Expand Down Expand Up @@ -517,4 +511,4 @@ JoinWQPProfiles <- function(FullPhysChem = "null",

}else{join3 <- join2}
return(join3)
}
}
2 changes: 1 addition & 1 deletion R/ResultFlagsIndependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@ InvalidCoordinates <- function(.data,

# if clean_outsideUSA is "change sign", change the sign of lat/long coordinates outside of USA
if (clean_outsideUSA == "change sign") {
print("Note: This is a temporary solution. Data owner should fix the raw data to address invalid coordinates through WQX. For assistance, email the WQX helpdesk ([email protected]).")
print("Note: When clean_outsideUSA == change sign, the sign of lat/long coordinates flagged as outside of USA are switched. This is a temporary solution. Data owners should fix the raw data to address invalid coordinates through WQX. For assistance fixing data errors you see in the WQP, email the WQX helpdesk ([email protected]).")
.data <- .data %>%
dplyr::mutate(
LatitudeMeasure = dplyr::case_when(
Expand Down
46 changes: 25 additions & 21 deletions R/Visualizations.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
CreateAnimatedMap <- function(.data) {

# code adapted from USGS blog: https://waterdata.usgs.gov/blog/large_sample_pull/
# reference: https://cran.r-project.org/web/packages/usmap/vignettes/advanced-mapping.html

# pull the year from the date
.data$year <- base::format(as.Date(.data$ActivityStartDate, format="%Y-%m-%d"),"%Y")
Expand All @@ -22,42 +23,45 @@ CreateAnimatedMap <- function(.data) {
.data %>%
dplyr::group_by(MonitoringLocationIdentifier, LatitudeMeasure, LongitudeMeasure, year) %>%
dplyr::summarize(mean = mean(.data$ResultMeasureValue, na.rm = TRUE),
median = stats::median(.data$ResultMeasureValue, na.rm = TRUE))
median = stats::median(.data$ResultMeasureValue, na.rm = TRUE))

# create a new character column with total nitrogen acceptable range designations
n_bysite <-
n_bysite %>% dplyr::mutate(TN_mean=
dplyr::case_when(mean<2 ~ "<2 mg/l",
mean>=2 & mean<=6 ~ "2-6 mg/l",
mean>6 ~ ">6 mg/l"))

dplyr::case_when(mean<2 ~ "<2 mg/l",
mean>=2 & mean<=6 ~ "2-6 mg/l",
mean>6 ~ ">6 mg/l"))
# convert latitude, longitude, and year data to numeric form
n_bysite$LatitudeMeasure <- as.numeric(n_bysite$LatitudeMeasure)
n_bysite$LongitudeMeasure <- as.numeric(n_bysite$LongitudeMeasure)
n_bysite$year <- as.numeric(n_bysite$year)

# first, create the base map data frame
all_state <- "usa"

usa <- map_data("state", interior=TRUE)
base_map <- ggplot2::ggplot(data = usa, mapping = aes(x = long,
y = lat,
group = group)) +
ggplot2::geom_polygon(color = "black", fill = "white") +
ggplot2::coord_quickmap() +
ggplot2::theme_void()
# plot the base map and add data to it
base_map <-
map_with_data <- usmap::plot_usmap("counties", include = "AK", labels = FALSE) +
ggplot2::geom_point(data = usmap::usmap_transform(n_bysite,
input_names = c("LongitudeMeasure", "LatitudeMeasure"),
output_names = c("x", "y")),
aes(x = x,
y = y),
color = "black", fill = "white")

# second, plot the base map and add data to it
map_with_data <- base_map +
ggplot2::geom_point(data = n_bysite, aes(x = LongitudeMeasure,
y = LatitudeMeasure,
color = TN_mean,
group = year,
frame = year)) +
ggplot2::geom_point(data = usmap::usmap_transform(n_bysite,
input_names = c("LongitudeMeasure", "LatitudeMeasure"),
output_names = c("x", "y")),
aes(x = x,
y = y,
color = TN_mean,
group = year,
frame = year)) +
gganimate::transition_time(year) +
ggplot2::ggtitle('Year: {frame_time}', # add year to the title
subtitle = 'Frame {frame} of {nframes}') +
subtitle = 'Frame {frame} of {nframes}') +
ggplot2::scale_colour_manual(values = c("blue", "red", "green"))

num_years <- max(n_bysite$year)-min(n_bysite$year) + 1

# lastly, run the animation
Expand Down
6 changes: 4 additions & 2 deletions man/TADABigdataRetrieval.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading