0% found this document useful (0 votes)

25 views9 pages

R Code

The document provides a comprehensive guide on data visualization techniques using R, including importing data, exploring quantitative and qualitative data, and creating various plots such as histograms, scatterplots, and bar charts. It also covers descriptive statistics, data subsetting, and Poisson distribution analysis. Additionally, it includes exercises and examples for practical application of the concepts discussed.

Uploaded by

harrypoter

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

25 views9 pages

R Code

Uploaded by

harrypoter

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

########################################################################

# Data Visualization - 01
########################################################################

rm(list=ls())

# Importing data csv files

# https://www.kaggle.com/datasets/spscientist/students-performance-in-
exams?resource=download

# Insert the address of "Data.csv" file

help("read.csv")

#data <- read.csv("C:/Users/DIM/Downloads/Data.csv")

read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science _MDAN

51163/Lecturer 01/Data.csv")
data <- read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science
_MDAN 51163/Lecturer 01/Data.csv")
data = read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science
_MDAN 51163/Lecturer 01/Data.csv")

####### Data exploration

head(data)

# Data structure
str(data)

########################################################################
# Quantitative data
########################################################################

data$math_score

# Plot the histogram of math_score

help("hist")
hist(data$math_score)
hist(data$reading_score)

hist(data$math_score,
main="Histogram of Math Score",
xlab="Math Score", ylab="Frequency")

# Frequency polygon of the math_score

#install.packages("ggplot2")
library(ggplot2)

help(ggplot)

ggplot(data, aes(data$math_score)) +
geom_freqpoly(bins=10)

ggplot(data, aes(math_score)) + geom_freqpoly(bins=10)

# Density plot of math_score
density(data$math_score)

plot(density(data$math_score), main="Density of Math Score",

xlab="Math Score", ylab="Density")

# Scatterplot of math_score vs writing scores

plot(data$math_score, data$writing_score,
main="Scatterplot",
xlab="Math score", ylab="Writing score", pch= 19)

# Line plot
plot(data$math_score, data$writing_score, type = "o", main="Line plot",
xlab="Height", ylab="weight", pch=19)

help(grid)

########################################################################
# Qualitative data
########################################################################

# Extract group data

Race_Group <- data$race
# Frequency table of group data
Group <- table(Race_Group)

# Pie chart
help(pie)
pie(Group, main="Pie Chart")

# Basic barplot
help("ggplot")
ggplot(data=Group)

# Constructing a data frame

help("data.frame")

table(Race_Group)

group_label =c("A", "B", "C", "D", "E") # Group names

count =c(89, 190, 319, 262, 140) # count of each group

group_count = data.frame(group_label, count)

# Basic barplot
ggplot(data=group_count, aes(x=group_label, y=count)) +
geom_bar(stat="identity")

help("geom_bar")

# Pareto Chart

#install.packages("qcc")
library(qcc)

pareto.chart(count)

# Extracting gender and race data

x <- subset(data, select = c(gender, race))

table(x)

grp = rep(c("A", "B", "C", "D", "E"),2)

gen = c("F", "F", "F", "F", "F","M","M","M","M","M")
fre = c(36, 104, 180, 129, 69, 53, 86, 139, 133, 71)

df <- data.frame(grp, gen, fre)

# Multiple barplot

# barplot with multiple groups

ggplot(data=df, aes(x=grp, y=fre, fill=gen)) +
geom_bar(stat="identity")

# Use position=position_dodge()
ggplot(data=df, aes(x=grp, y=fre, fill=gen)) +
geom_bar(stat="identity", position=position_dodge())

# Sorting data based on the ascending order of math_score

data_sorted <- data[order(data$math_score),]
# Sorting data based on the descending order of math_score
data_sorted_ascending <- data[order(data$math_score, decreasing = TRUE),]

#########################################################################
######

# visualizing multivariate data:

# "women" dataset is available in R: https://stat.ethz.ch/R-manual/R-

devel/library/datasets/html/women.html
help(women)
data <- women

h <- data$height
w <- data$weight

# Scatterplot
plot(women, xlab = "Height (in)", ylab = "Weight (lb)",
main = "women data: American women aged 30-39")
grid(nx = 10, ny = 10)

# Line plot
plot(h, w, type = "o", main="Line plot",
xlab="Height", ylab="weight", pch=19)
help(grid)
#########################################################################
######

##Group Activity: Perform a descriptive analysis for your dataset

and interpret your results.

#Group/Room Dataset Data Description

#1 airquality https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/airquality.html
#2 attenu https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/attenu.html
#3 freeny https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/freeny.html
#4 iris https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/iris.html
#5 quakes https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/quakes.html
#6 rock https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/rock.html
#7 stackloss https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/stackloss.html
#8 swiss https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/swiss.html

# Check the datasets available in R: https://stat.ethz.ch/R-manual/R-

devel/library/datasets/html/00Index.html

#########################################################################
######

## Subsetting data

# Extract data between entry 30 and entry 55 for the first 3 variables
data_subset1 <- data[30:55, 1:3]
# Extract data between entry 30 and entry 55 for the first, second, and
fifth variables
data_subset1_1 <- data[30:55, c(1,2,5)]

# Extract data of the male students:

help("subset")
data_subset2 <- subset(data, gender == "male")

# Extract data of the male students who scored more than 75 for
mathematics:
data_subset3 <- subset(data, gender == "male" & math_score > 75)

# Extract data of the male students or students who scored more than 75
for mathematics:
data_subset3_1 <- subset(data, gender == "male" | math_score > 75)
# scenario 1: gender == "male"
# scenario 2: math_score > 75
# scenario 3: gender == "male" and math_score > 75
# Extract scores of the male students who scored more than 75 for
mathematics:
data_subset4 <- subset(data, gender == "male" & math_score > 75,
select = c(math_score, reading_score,
writing_score))

# Alternative method
data_subset3 <- subset(data, gender == "male" & math_score > 75)
#data_subset4_1 <- subset(data_subset3, select = c(math_score,
reading_score, writing_score))
data_subset4_2 <- data_subset3[, 2:4]

# To get a random sample from the data whole data set

set.seed(123) # To generate the same random sample
# Randomly select 40% of the data as a sample
sample_40 <- sample(c(TRUE, FALSE), nrow(DATA), replace=TRUE,
prob=c(0.4,0.6))
SAMPLE <- DATA[sample_40, ]

########################################################################
# Descriptive Statistics 02
########################################################################

# Create a univariate dataset

data <- c(4,10, 5, 8, 7.5, 8, 5, 16.5, 1, 7.8, 8, 10, 11, 18, 15,9, 14,
23, 21, 28)

# Mean
help(mean)
mean(data)
Mean_data <- mean(data)

# Median
help(median)
median(data)
med <- median(data)
MED = median(data)

# Mode
# We use frequency table
table(data)

# Range
max(data) # Maximum
min(data) # Minimum
# Calculating range
Range = max(data) - min(data)

# Standard deviation
help(sd)
sd(data)
# Variance
var(data)

# Coefficient of Variation
# Coefficient of Variation = std dev/mean*100

cv <- sd(data) / mean(data) * 100

# Inter Quartile Range

help("quantile")
Q1 <- quantile(data, c(.25))

quantile(data, c(.25, .75))

IQR(data)

# Five-number summary
summary(data)

# Boxplot
help("boxplot")
boxplot(data)

# Histogram
data <- c(4,10, 5, 8, 7.5, 8, 5, 16.5, 1, 7.8, 8, 10, 11, 18, 15,9, 14,
23, 21, 28)
hist(data)
help(hist)

# Density plot
density(data)
plot(density(data))

# Skewness & kurtosis

#install.packages("moments")
library(moments)

help(skewness)
skewness(data)
kurtosis(data)

## Data set with missing value

# Create a vector.
x <- c(12,7,3,4.2,18,2,54,-21,8,-5,NA)

# Find mean with missing data

help(mean)
mean_WithMissing <- mean(x)
mean_WithMissing

# Find mean dropping missing data (NA entries)

mean_WithOutMissing <- mean(x,na.rm = TRUE)
mean_WithOutMissing

#########################################################################
######
## Perform a descriptive analysis for the "iris" dataset and interpret
your results.

# iris: https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/iris.html

help(iris)

DATA <- iris

########################################################################
# Poisson Distribution 04

# Content: Density, distribution function, and random generation

# for the Poisson distribution with parameter lambda(=mu).

# Density function: dpois(x, lambda, log = FALSE)

# Distribution function (cumulative probability): ppois(q, lambda,
lower.tail = TRUE, log.p = FALSE)
# Random Generation: rpois(n, lambda)

########################################################################

rm(list=ls()) # Clear the Environment

# Example:

# RDA investigated that there are twelve cars crossing a bridge per
minute on average.

#(a) Find the probability of having

# (i) no cars
# (ii) three or more cars
# (iii) less than 17 cars
# crossing the bridge in a particular minute.

#(b) Plot the probability distribution of No of cars crossing the bridge.

# X = The number of cars crossing the bridge in a particular minute

# x = 0, 1, 2, 3,.....

# Poisson distribution with parameter lambda(=mu).

# X follows a Poisson(lamda = 12) distribution

help(dpois)

#(i) no cars: X=0

x = 0
# (i) P(X = 0)
P_X_0 <- dpois(x=0, lambda=12)
P_X_0

sprintf("P(X = 0) = %s", round(P_X_0, digits = 6))

sprintf("The probability of no cars crossing the bridge in a minute is
%s", round(P_X_0, digits = 6))
#(ii) three or more cars: X >= 3
# (ii) P(X >= 3) = 1 - P(X <= 2)
P_X_geq_3 <- 1 - ppois(2, lambda=12) # lower tail
P_X_geq_3

# P(X >= 3)
P_X_GEQ_3 <- ppois(3, lambda=12, lower=FALSE) # upper tail
P_X_GEQ_3
sprintf("P(X >= 3) = %s", round(P_X_GEQ_3, digits = 4))

#(iii) less than 17 cars

# (iii) P(X <= 17)
P_X_leq_17 <- ppois(17, lambda=12) # lower tail
P_X_leq_17
sprintf("P(X <= 17) = %s", round(P_X_leq_17, digits = 4))

# (b) The Poisson probability distribution plot

x <- 0:20

par(mfrow = c(2, 1))

# Probability density function (pdf)

barplot(dpois(x, lambda=12),col = "red",names.arg=x,
xlab = "X = No of cars crossing the bridge", ylab = "pdf: P(X =
x)",
main="Poisson (mu = 12) pdf")

# Cumulative density function (cdf)

barplot(ppois(x, lambda=12),col = "red",names.arg=x,
xlab = "X = No of cars crossing the bridge", ylab = "cdf:P(X <=
x)",
main="Poisson (mu = 12) cdf")

########################################################################
# Extra: Random generation for a Poisson distribution with parameter
lambda(=mu).

# rpois(n, lambda)

#Create a data set of 30 samples from a Poisson distribution with lambda

= 6.23
set.seed(2) # to get the sample
rpois(n=30, lambda = 6.23)

########################################################################
# Exercise:

# The number of accidents that occur at a busy intersection is Poisson

distributed
# with a mean of 3.5 per week. Find the probability of the following
events:
# (a) Less than three accidents in a week
# (b) Five or more accidents in a week
# (c) No accidents today

Omnibus Certification
No ratings yet
Omnibus Certification
1 page
Stastistics and Probability With R Programming Language: Lab Report
50% (2)
Stastistics and Probability With R Programming Language: Lab Report
44 pages
SML Practical 1to11
No ratings yet
SML Practical 1to11
23 pages
RSTUDIO
No ratings yet
RSTUDIO
44 pages
Optilift RPC Manual Rockwell
No ratings yet
Optilift RPC Manual Rockwell
462 pages
Advantage 1 More Practice Burlington Books Compress
No ratings yet
Advantage 1 More Practice Burlington Books Compress
50 pages
Statistics & Data Science Cheat Sheet
No ratings yet
Statistics & Data Science Cheat Sheet
3 pages
Chords Dark Side of Moon
No ratings yet
Chords Dark Side of Moon
14 pages
Lecture 10 R
No ratings yet
Lecture 10 R
117 pages
Ar
No ratings yet
Ar
10 pages
Data Analysis with R: Tables & Plots
No ratings yet
Data Analysis with R: Tables & Plots
13 pages
Essential R Commands Guide
No ratings yet
Essential R Commands Guide
11 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
Lab File AD PDF
No ratings yet
Lab File AD PDF
25 pages
List of Programs in R 2 Sem
No ratings yet
List of Programs in R 2 Sem
48 pages
Unit - 2: Data Manipulation With R & Data Visualization in Watson Studio
No ratings yet
Unit - 2: Data Manipulation With R & Data Visualization in Watson Studio
58 pages
R Lab Manual
No ratings yet
R Lab Manual
31 pages
Data Analysis for Analysts
No ratings yet
Data Analysis for Analysts
56 pages
R Functions
No ratings yet
R Functions
8 pages
R Data Visualization Techniques
No ratings yet
R Data Visualization Techniques
46 pages
R Record-1
No ratings yet
R Record-1
57 pages
R For Data Exploration
No ratings yet
R For Data Exploration
52 pages
Spatial Statistics in R
No ratings yet
Spatial Statistics in R
29 pages
R Complete
No ratings yet
R Complete
24 pages
Jumat 10 Feb 2023 Diterima Sarana
No ratings yet
Jumat 10 Feb 2023 Diterima Sarana
1,535 pages
Copy Entire Document Content in R Studio
No ratings yet
Copy Entire Document Content in R Studio
17 pages
Copy Entire Document Content in R Studio: R Script Compiled by Mr. Anup Sharma (Strictly To Be Used As Class Notes)
No ratings yet
Copy Entire Document Content in R Studio: R Script Compiled by Mr. Anup Sharma (Strictly To Be Used As Class Notes)
15 pages
IntroR 2
No ratings yet
IntroR 2
18 pages
R Session - Note2 - Updated
No ratings yet
R Session - Note2 - Updated
7 pages
Analysis Using Statistical: Introduction & Data Exploration
No ratings yet
Analysis Using Statistical: Introduction & Data Exploration
23 pages
R Course
No ratings yet
R Course
7 pages
Commands For Data Analysis Using R
No ratings yet
Commands For Data Analysis Using R
11 pages
R File Code
No ratings yet
R File Code
16 pages
R Cheatsheet ABC
No ratings yet
R Cheatsheet ABC
3 pages
Applied Statistics MAT1011
No ratings yet
Applied Statistics MAT1011
22 pages
R Commands
No ratings yet
R Commands
18 pages
Donnees Mathematique
No ratings yet
Donnees Mathematique
9 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
TEB2043 Introduction To Data Science: Descriptive Analytics & Visualization DR Shuhaida Mohamed Shuhidan JAN 2025
No ratings yet
TEB2043 Introduction To Data Science: Descriptive Analytics & Visualization DR Shuhaida Mohamed Shuhidan JAN 2025
29 pages
R Console
No ratings yet
R Console
6 pages
Graphical Analysis
No ratings yet
Graphical Analysis
64 pages
CourseKata R Cheatsheet ABC
No ratings yet
CourseKata R Cheatsheet ABC
5 pages
2 R - Zajecia - 4 - Eng
No ratings yet
2 R - Zajecia - 4 - Eng
7 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
Data Visulization1
No ratings yet
Data Visulization1
39 pages
Overview On DBS
No ratings yet
Overview On DBS
30 pages
R Pgms 30
No ratings yet
R Pgms 30
6 pages
Mock Paper 2
No ratings yet
Mock Paper 2
11 pages
UL2
No ratings yet
UL2
2 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
Fda SSIGNMENT 02
No ratings yet
Fda SSIGNMENT 02
13 pages
R Training AM
No ratings yet
R Training AM
6 pages
R
No ratings yet
R
6 pages
Practical 1 EDA
No ratings yet
Practical 1 EDA
14 pages
Lab Exercise 1
No ratings yet
Lab Exercise 1
16 pages
Ayush Sonar 310104230868 Practical 3 DS R
No ratings yet
Ayush Sonar 310104230868 Practical 3 DS R
10 pages
R Program
No ratings yet
R Program
22 pages
Advanced Stats & Data Science Guide
No ratings yet
Advanced Stats & Data Science Guide
3 pages
Apartment Management Project Report
20% (10)
Apartment Management Project Report
12 pages
BCOS184
No ratings yet
BCOS184
333 pages
ISO 17987-4 - 2013 - Draft
No ratings yet
ISO 17987-4 - 2013 - Draft
37 pages
Gti 2500 Manual
100% (1)
Gti 2500 Manual
105 pages
ISO/IEC JTC1 SC35 Dissemination Event February 2023 - WG6
No ratings yet
ISO/IEC JTC1 SC35 Dissemination Event February 2023 - WG6
17 pages
Samsung Max-Vl65 Vl69 SCH
No ratings yet
Samsung Max-Vl65 Vl69 SCH
12 pages
2013HW70753-EndSemReport-Sagar Agrawal
No ratings yet
2013HW70753-EndSemReport-Sagar Agrawal
56 pages
All Values in The First Column
No ratings yet
All Values in The First Column
7 pages
Ds
No ratings yet
Ds
2 pages
Optimal Design Algorithm Comparison
No ratings yet
Optimal Design Algorithm Comparison
33 pages
0 Intro
No ratings yet
0 Intro
26 pages
06 Synchronization
No ratings yet
06 Synchronization
52 pages
Credit Card Usage Analysis Using KMeans Clustering Report
No ratings yet
Credit Card Usage Analysis Using KMeans Clustering Report
16 pages
V Unit
No ratings yet
V Unit
27 pages
One-Step Image Translation Method
No ratings yet
One-Step Image Translation Method
29 pages
R Guru Cheat Sheet
No ratings yet
R Guru Cheat Sheet
2 pages
Focusrite Windows Driver Updates
No ratings yet
Focusrite Windows Driver Updates
10 pages
2 - Architecture and Organization
No ratings yet
2 - Architecture and Organization
22 pages
SMTS File - 1 RS20200105 2020 05 19 14 - 26 - 04
No ratings yet
SMTS File - 1 RS20200105 2020 05 19 14 - 26 - 04
2 pages
INTERNAL
No ratings yet
INTERNAL
11 pages
Thesis Asset Management Client Login
100% (2)
Thesis Asset Management Client Login
4 pages
Day 7 Task: Understanding Package Manager and Systemctl: Tasks
No ratings yet
Day 7 Task: Understanding Package Manager and Systemctl: Tasks
6 pages
Module 2.9
No ratings yet
Module 2.9
12 pages
Corporate Brochure
No ratings yet
Corporate Brochure
6 pages
Prob Lab
No ratings yet
Prob Lab
10 pages
Inventions Patent Inspired Portable Social Networking Site Vacuum Cleaner Solar Charger Versatile
No ratings yet
Inventions Patent Inspired Portable Social Networking Site Vacuum Cleaner Solar Charger Versatile
2 pages
Venkata Rami Reddy Resume
No ratings yet
Venkata Rami Reddy Resume
1 page
(Practical) Programming With R
No ratings yet
(Practical) Programming With R
5 pages

R Code

Uploaded by

R Code

Uploaded by

########################################################################

# Importing data csv files

# Insert the address of "Data.csv" file

#data <- read.csv("C:/Users/DIM/Downloads/Data.csv")

read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science _MDAN

####### Data exploration

# Plot the histogram of math_score

# Frequency polygon of the math_score

ggplot(data, aes(math_score)) + geom_freqpoly(bins=10)

plot(density(data$math_score), main="Density of Math Score",

# Scatterplot of math_score vs writing scores

# Extract group data

# Constructing a data frame

group_label =c("A", "B", "C", "D", "E") # Group names

group_count = data.frame(group_label, count)

# Extracting gender and race data

grp = rep(c("A", "B", "C", "D", "E"),2)

df <- data.frame(grp, gen, fre)

# barplot with multiple groups

# Sorting data based on the ascending order of math_score

# visualizing multivariate data:

# "women" dataset is available in R: https://stat.ethz.ch/R-manual/R-

##Group Activity: Perform a descriptive analysis for your dataset

#Group/Room Dataset Data Description

# Check the datasets available in R: https://stat.ethz.ch/R-manual/R-

# Extract data of the male students:

# To get a random sample from the data whole data set

# Create a univariate dataset

cv <- sd(data) / mean(data) * 100

# Inter Quartile Range

quantile(data, c(.25, .75))

# Skewness & kurtosis

## Data set with missing value

# Find mean with missing data

# Find mean dropping missing data (NA entries)

DATA <- iris

# Content: Density, distribution function, and random generation

# Density function: dpois(x, lambda, log = FALSE)

rm(list=ls()) # Clear the Environment

#(a) Find the probability of having

#(b) Plot the probability distribution of No of cars crossing the bridge.

# X = The number of cars crossing the bridge in a particular minute

# Poisson distribution with parameter lambda(=mu).

#(i) no cars: X=0

sprintf("P(X = 0) = %s", round(P_X_0, digits = 6))

#(iii) less than 17 cars

# (b) The Poisson probability distribution plot

par(mfrow = c(2, 1))

# Probability density function (pdf)

# Cumulative density function (cdf)

#Create a data set of 30 samples from a Poisson distribution with lambda

# The number of accidents that occur at a busy intersection is Poisson

You might also like