R-Programming:
print(“Hello World!”)
A=9
B=4
print(A+B)
Country <- c("China", "India", "United States", "Indonesia", "Pakistan")
Population_1_july_2018 <- c("1,427,647,786", "1,352,642,280",
"327,096,265", "267,670,543", "212,228,286")
Population_1_july_2019 <- c("1,433,783,686", "1,366,417,754",
"329,064,917", "270,625,568", "216,565,318")
change_in_percents <- c("+0.43%", "+1.02%", "+0.60%", "+1.10%", "+2.04%")
data <- data.frame(Country, Population_1_july_2018, Population_1_july_2019,
change_in_percents)
print(data)
# read airtravel.csv file from our current directory
read_data <- read.csv("airtravel.csv")
# display csv file
print(read_data)
Load csv files
sampledata <- read.csv("D:/R/Sampledatasets/credit.csv")
View(sampledata)
summary(sampledata)
Get the list of all the R packages installed
1.Bar Plot
barplot(sampledata $amount, main = 'Amount Distribution on Credit',
xlab = 'Amount in Rupees', col ='blue',
horiz = FALSE)
library(ggplot2)
ggplot(sampledata, aes(x = factor(purpose), fill = factor(purpose))) +
geom_bar() +
labs(title = "Education Distribution", x = "Education Level", y = "Count") +
theme_minimal()
2.Scatter Plot
data(sampledata)
plot(sampledata $amount, sampledata $months_loan_duration,
main ="Scatterplot Example",
xlab ="Amount",
ylab =" Month of loan duration ", pch = 19)
3.Pies Chart:
expenditure <- c(600, 300, 150, 100, 200)
result <- pie(expenditure,
main = "Monthly Expenditure Breakdown",
labels = c("Housing", "Food", "Cloths", "Entertainment", "Other"),
col = c("red", "orange", "yellow", "blue", "green")
)
Box Plot
ggplot(cars_data, aes(group=am, y=mpg, fill=as.factor(am))) +
geom_boxplot()
Distribution:
1.Normal Distribution
# creating a sequence of values
# between -15 to 15 with a difference of 0.1
x = seq(-15, 15, by=0.1)
y = dnorm(x, mean(x), sd(x))
# output to be present as PNG file
png(file="dnormExample.png")
# Plot the graph.
plot(x, y)
# saving the file
dev.off()
2.Binomial Distribution
pbinom(3, size = 13, prob = 1 / 6)
plot(0:10, pbinom(0:10, size = 10, prob = 1 / 6), type = "l")
4.Correlation
1.round(cor(my_data),
digits = 2 # rounded to 2 decimals
)
2.pairs (my_data[, c("mpg", "hp", "wt")])
3.library(corrplot)
corrplot(cor(my_data),
method = "number",
type = "upper" # show only upper side
)
4.library(GGally)
Ggpa.irs(my_data[, c("mpg", "hp", "wt")])
Data grouping value for summary:
# create dataframe with 4 columns
data=data.frame(name=c("ojaswi","bobby","rohith","gnanesh","sireesha"),
subjects=c("java","java","python","cpp","python"),
age=c(21,23,21,20,19),
id=c(1,2,3,4,5))
# display
data
data=data.frame(name=c("ojaswi","bobby","rohith","gnanesh","sireesha"),
subjects=c("java","java","python","cpp","python"),
age=c(21,23,21,20,19),
id=c(1,2,3,4,5))
# display summary by grouping subjects with age
tapply(data$age, data$subjects, summary)
1.One sample t-test
# Sample data
data <- c(10, 15, 20, 25, 30, 35)
# Hypothesized population mean
null_hypothesis_mean <- 20
# One-sample t-test
result <- t.test(data, mu = null_hypothesis_mean)
print(result)
2. Two Sample t-test
# Example data: exam scores for two groups
group1 <- c(75, 82, 68, 90, 79)
group2 <- c(85, 71, 88, 77, 80)
# Two-sample t-test
result <- t.test(group1, group2)
# Print the result
print(result)
1.#Timeseries plot
# Weekly data of COVID-19 positive cases from
# 22 January, 2020 to 15 April, 2020
x <- c(580, 7813, 28266, 59287, 75700,
87820, 95314, 126214, 218843, 471497,
936851, 1508725, 2072113)
# library required for decimal_date() function
library(lubridate)
# output to be created as png file
png(file ="timeSeries.png")
# creating time series object
# from date 22 January, 2020
mts <- ts(x, start = decimal_date(ymd("2020-01-22")),
frequency = 365.25 / 7)
# plotting the graph
plot(mts, xlab ="Weekly Data",
ylab ="Total Positive Cases",
main ="COVID-19 Pandemic",
col.main ="darkgreen")
# saving the file
dev.off()
2.Time series plot with Positives and deaths:
# Weekly data of COVID-19 positive cases and
# weekly deaths from 22 January, 2020 to
# 15 April, 2020
positiveCases <- c(580, 7813, 28266, 59287,
75700, 87820, 95314, 126214,
218843, 471497, 936851,
1508725, 2072113)
deaths <- c(17, 270, 565, 1261, 2126, 2800,
3285, 4628, 8951, 21283, 47210,
88480, 138475)
# library required for decimal_date() function
library(lubridate)
# output to be created as png file
png(file="multivariateTimeSeries.png")
# creating multivariate time series object
# from date 22 January, 2020
mts <- ts(cbind(positiveCases, deaths),
start = decimal_date(ymd("2020-01-22")),
frequency = 365.25 / 7)
# plotting the graph
plot(mts, xlab ="Weekly Data",
main ="COVID-19 Cases",
col.main ="darkgreen")
# saving the file
dev.off()
5.Regression Analysis:
cars_data <- mtcars
head(cars_data)
plot(cars_data$mpg)
hist(cars_data$mpg)
cars_lm <- lm("mpg ~ wt", data=cars_data)
summary(cars_lm)