Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
17 views4 pages

DataQuest - Project

Uploaded by

hnaila868
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views4 pages

DataQuest - Project

Uploaded by

hnaila868
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

DataQuest Project

Garakishi Guluzade

2024-09-24

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF,
and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the
output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00

Including Plots

You can also embed plots, for example:

1
800
600
pressure

400
200
0

0 50 100 150 200 250 300 350

temperature

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that
generated the plot.
This is a project for DataQuest course. We will try to find out “Which countries have reported the highest
number of positive cases in relation to the number of tests conducted?” for Covid - 19 pandemic.
library(tidyverse) library(readr) library(dslabs) install.packages(“tinytex”) tinytex::install_tinytex()
covid19 <- read_csv(“covid19.csv”)
dim(covid19)
vector_cols <- colnames(covid19) vector_cols
head(covid19) glimpse(covid19) view(covid19)
covid_df_all_states <- covid19 %>% filter(Province_State == “All States”) %>% select(-Province_State)
covid_df_all_states
covid_df_all_states_daily <- covid19 %>% select(Date, Country_Region, active, hospitalizedCurr,
daily_tested, daily_positive)
covid_df_all_states_daily
library(dplyr)

Summarizing the data by grouping by ‘Country_Region’


covid_df_all_states_daily_sum <- covid_df_all_states_daily %>% group_by(Country_Region) %>%
# Group rows by the ‘Country_Region’ column summarize( tested = sum(daily_tested, na.rm = TRUE),

2
# Sum of ‘daily_tested’ positive = sum(daily_positive, na.rm = TRUE), # Sum of ‘daily_positive’ active
= sum(active, na.rm = TRUE), # Sum of ‘active’ hospitalized = sum(hospitalizedCurr, na.rm = TRUE)
# Sum of ‘hospitalizedCurr’ ) %>% arrange(desc(tested)) # Arrange the result in descending order by the
‘tested’ column

Display the result


print(covid_df_all_states_daily_sum)
covid_top_10 <- head(covid_df_all_states_daily_sum, 10) covid_top_10
countries <- covid_top_10CountryR egioncountriestestedc ases < −covidt op1 0tested tested_cases posi-
tive_cases <- covid_top_10positivepositivec asesactivec ases < −covidt op1 0active active_cases hospital-
ized_cases <- covid_top_10$hospitalized hospitalized_cases

Assign country names to each vector


names(tested_cases) <- countries names(positive_cases) <- countries names(active_cases) <- countries
names(hospitalized_cases) <- countries

Display the named vectors


print(tested_cases) print(positive_cases) print(active_cases) print(hospitalized_cases)
positive_tested_ratio <- positive_cases / tested_cases positive_tested_ratio
top_3_indexes <- order(positive_tested_ratio, decreasing = T)[1:3] positive_tested_top_3 <- posi-
tive_tested_ratio[top_3_indexes] positive_tested_top_3
#Keeping relevant information united_kingdom <- c(0.11, 1473672, 166909, 0, 0) united_states <- c(0.10,
17282363, 1877179, 0, 0) turkey <- c(0.08, 2031192, 163941, 2980960, 0)
covid_mat <- rbind(united_kingdom, united_states, turkey) covid_mat colnames(covid_mat) <-
c(“Ratio”, “tested”, “positive”, “active”, “hospitalized”) covid_mat
#Putting all together question <- “Which countries have had the highest number of positive cases
against the number of tests?” answer <- c(“Positive tested cases” = positive_tested_top_3) answer
data_list <- list(question = question, answer = answer) data_list # Assuming your dataframes are
named covid_df_all_states_daily and covid_top_10 dataframes_list <- list(covid_df_all_states_daily
= covid_df_all_states_daily, covid_top_10 = covid_top_10)

Display the list of dataframes


print(dataframes_list) # Assuming you have created matrix1 and matrix2 matrices_list <- list(covid_mat
= covid_mat)

Display the matrices list


print(matrices_list)

3
Create a list that contains the vectors
vectors_list <- list( tested_cases = tested_cases, positive_cases = positive_cases, active_cases = ac-
tive_cases, hospitalized_cases = hospitalized_cases, countries = countries )

Create the combined named list


data_structure_list <- list( dataframes = dataframes_list, # From the previous steps matrices = matri-
ces_list, vectors = vectors_list )

Display the final named list


print(data_structure_list)

Display the vectors list


print(vectors_list)
covid_analysis_list <- list(question = question, answer = answer, data_structure_list = data_structure_list)
covid_analysis_list covid_analysis_list[[2]]

You might also like