0% found this document useful (0 votes)

139 views7 pages

Supervised Learning in R Classification

The document discusses various machine learning techniques for classifying road signs and predicting locations using k-nearest neighbors (kNN), naive Bayes, logistic regression, and decision trees. It includes code to load datasets, explore data, build models using different algorithms, make predictions, evaluate accuracy, and visualize results. Examples analyze sign classification with kNN, location prediction using naive Bayes, donor modeling with logistic regression, and loan outcomes with decision trees.

Uploaded by

Octavio Flores

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

139 views7 pages

Supervised Learning in R Classification

Uploaded by

Octavio Flores

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 7

Recognizing a road sign with kNN

# Load the 'class' package

library(class)

# Create a vector of labels

sign_types <- signs$sign_type

# Classify the next sign observed

knn(train = signs[-1], test = next_sign, cl = sign_types)

//2

Exploring the traffic sign dataset

# Examine the structure of the signs dataset
str(signs)

# Count the number of signs of each type

table(signs$sign_type)

# Check r10's average red level by sign type

aggregate(r10 ~ sign_type, data = signs, mean)

Classifying a collection of road signs

# Use kNN to identify the test road signs
sign_types <- signs$sign_type
signs_pred <- knn(train = signs[-1], test = test_signs[-1], cl = sign_types)

# Create a confusion matrix of the predicted versus actual values

signs_actual <- test_signs$sign_type
table(signs_pred, signs_actual)

# Compute the accuracy

mean(signs_pred == signs_actual)

//4

Testing other 'k' values

# Compute the accuracy of the baseline model (default k = 1)
k_1 <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types)
mean(signs_actual == k_1)

# Modify the above to set k = 7

k_7 <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types, k = 7)
mean(signs_actual == k_7)

# Set k = 15 and compare to the above

k_15 <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types, k =
15)
mean(signs_actual == k_15)

Seeing how the neighbors voted

# Use the prob parameter to get the proportion of votes for the winning class
sign_pred <- knn(train = signs[-1], test = signs_test[-1], cl = sign_types, k
= 7, prob = TRUE)

# Get the "prob" attribute from the predicted classes

sign_prob <- attr(sign_pred, "prob")

# Examine the first several predictions

head(sign_pred)

# Examine the proportion of votes for the winning class

head(sign_prob)

//1

Computing probabilities
# Compute P(A)
p_A <- nrow(subset(where9am, location == "office")) / nrow(where9am)

# Compute P(B)
p_B <- nrow(subset(where9am, daytype == "weekday")) / nrow(where9am)

# Compute the observed P(A and B)

p_AB <- nrow(subset(where9am, location == "office" & daytype == "weekday")) /
nrow(where9am)

# Compute P(A | B) and print its value

p_A_given_B <- p_AB / p_B
p_A_given_B

//4

A simple Naive Bayes location model

# Load the naivebayes package
library(naivebayes)

# Build the location prediction model

locmodel <- naive_bayes(location ~ daytype, data = where9am)

# Predict Thursday's 9am location

predict(locmodel, thursday9am)

# Predict Saturdays's 9am location

predict(locmodel, saturday9am)

Examining "raw" probabilities

# The 'naivebayes' package is loaded into the workspace
# and the Naive Bayes 'locmodel' has been built

# Examine the location prediction model

locmodel

# Obtain the predicted probabilities for Thursday at 9am

predict(locmodel, thursday9am, type = "prob")

# Obtain the predicted probabilities for Saturday at 9am

predict(locmodel, saturday9am, type = "prob")

//3
//2

A more sophisticated location model

# The 'naivebayes' package is loaded into the workspace already

# Build a NB model of location

locmodel <- naive_bayes(location ~ daytype + hourtype, data = locations)
# Predict Brett's location on a weekday afternoon
predict(locmodel, weekday_afternoon)

# Predict Brett's location on a weekday evening

predict(locmodel, weekday_evening)

Preparing for unforeseen circumstances

# The 'naivebayes' package is loaded into the workspace already
# The Naive Bayes location model (locmodel) has already been built

# Observe the predicted probabilities for a weekend afternoon

predict(locmodel, weekend_afternoon, type = "prob")

# Build a new model using the Laplace correction

locmodel2 <- naive_bayes(location ~ daytype + hourtype, data = locations,
laplace = 1)

# Observe the new predicted probabilities for a weekend afternoon

predict(locmodel2, weekend_afternoon, type = "prob")

//1
//4

Building simple logistic regression models

# Examine the dataset to identify potential independent variables
str(donors)

# Explore the dependent variable

table(donors$donated)

# Build the donation model

donation_model <- glm(donated ~ bad_address + interest_religion +
interest_veterans,
data = donors, family = "binomial")

# Summarize the model results

summary(donation_model)

Making a binary prediction

# Estimate the donation probability
donors$donation_prob <- predict(donation_model, type = "response")

# Find the donation probability of the average prospect

mean(donors$donated)

# Predict a donation if probability of donation is greater than average

donors$donation_pred <- ifelse(donors$donation_prob > 0.0504, 1, 0)

# Calculate the model's accuracy

mean(donors$donated == donors$donation_pred)

//4

Calculating ROC Curves and AUC

# Load the pROC package
library(pROC)

# Create a ROC curve

ROC <- roc(donors$donated, donors$donation_prob)
# Plot the ROC curve
plot(ROC, col = "blue")

# Calculate the area under the curve (AUC)

auc(ROC)

//4

Coding categorical features

# Convert the wealth rating to a factor
donors$wealth_levels <- factor(donors$wealth_rating, levels = c(0, 1, 2, 3),
labels = c("Unknown", "Low", "Medium", "High"))

# Use relevel() to change reference category

donors$wealth_levels <- relevel(donors$wealth_levels, ref = "Medium")

# See how our factor coding impacts the model

summary(glm(donated ~ wealth_levels, data = donors, family = "binomial"))

Handling missing data

# Find the average age among non-missing values
summary(donors$age)

# Impute missing age values with the mean age

donors$imputed_age <- ifelse(is.na(donors$age), round(mean(donors$age, na.rm
= TRUE), 2), donors$age)

# Create missing value indicator for age

donors$missing_age <- ifelse(is.na(donors$age), 1, 0)

//4

Building a more sophisticated model

# Build a recency, frequency, and money (RFM) model
rfm_model <- glm(donated ~ recency * frequency + money, data = donors, family
= "binomial")

# Summarize the RFM model to see how the parameters were coded
summary(rfm_model)

# Compute predicted probabilities for the RFM model

rfm_prob <- predict(rfm_model, data = donors, type = "response")

# Plot the ROC curve for the new model

library(pROC)
ROC <- roc(donors$donated, rfm_prob)
plot(ROC, col = "red")
auc(ROC)

//2

Building a stepwise regression model

# Specify a null model with no predictors
null_model <- glm(donated ~ 1, data = donors, family = "binomial")

# Specify the full model using all of the potential predictors

full_model <- glm(donated ~ ., data = donors, family = "binomial")
# Use a forward stepwise algorithm to build a parsimonious model
step_model <- step(null_model, scope = list(lower = null_model, upper =
full_model), direction = "forward")

# Estimate the stepwise donation probability

step_prob <- predict(step_model, type = "response")

# Plot the ROC of the stepwise model

library(pROC)
ROC <- roc(donors$donated, step_prob)
plot(ROC, col = "red")
auc(ROC)

Building a simple decision tree

# Load the rpart package
library(rpart)

# Build a lending model predicting loan outcome versus loan amount and credit
score
loan_model <- rpart(outcome ~ loan_amount + credit_score, data = loans,
method = "class", control = rpart.control(cp = 0))

# Make a prediction for someone with good credit

predict(loan_model, good_credit, type = "class")

# Make a prediction for someone with bad credit

predict(loan_model, bad_credit, type = "class")

Visualizing classification trees

# Examine the loan_model object
loan_model

# Load the rpart.plot package

library(rpart.plot)

# Plot the loan_model with default settings

rpart.plot(loan_model)

# Plot the loan_model with customized settings

rpart.plot(loan_model, type = 3, box.palette = c("red", "green"),
fallen.leaves = TRUE)

//4
//3

Creating random test datasets

# Determine the number of rows for training
nrow(loans) * 0.75

# Create a random sample of row IDs

sample_rows <- sample(nrow(loans), nrow(loans) * 0.75)

# Create the training dataset

loans_train <- loans[sample_rows, ]

# Create the test dataset

loans_test <- loans[-sample_rows, ]

Building and evaluating a larger tree

# Grow a tree using all of the available applicant data
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0))

# Make predictions on the test dataset

loans_test$pred <- predict(loan_model, loans_test, type = "class")

# Examine the confusion matrix

table(loans_test$pred, loans_test$outcome)

# Compute the accuracy on the test dataset

mean(loans_test$pred == loans_test$outcome)

//2

Preventing overgrown trees

# Grow a tree with maxdepth of 6
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0, maxdepth = 6))

# Make a class prediction on the test set

loans_test$pred <- predict(loan_model, loans_test, type = "class")

# Compute the accuracy of the simpler tree

mean(loans_test$pred == loans_test$outcome)
//////////////////////////
# Swap maxdepth for a minimum split of 500
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0, minsplit = 500))

# Run this. How does the accuracy change?

loans_test$pred <- predict(loan_model, loans_test, type = "class")
mean(loans_test$pred == loans_test$outcome)

Creating a nicely pruned tree

# Grow an overly complex tree
loan_model <- rpart(outcome ~ ., data = loans_train, method = "class",
control = rpart.control(cp = 0))

# Examine the complexity plot

plotcp(loan_model)

# Prune the tree

loan_model_pruned <- prune(loan_model, cp = 0.0014)

# Compute the accuracy of the pruned tree

loans_test$pred <- predict(loan_model_pruned, loans_test, type = "class")
mean(loans_test$pred == loans_test$outcome)

//4
//3

Building a random forest model

# Load the randomForest package
library(randomForest)

# Build a random forest model

loan_model <- randomForest(outcome ~ ., data = loans_train)
# Compute the accuracy of the random forest
loans_test$pred <- predict(loan_model, loans_test)
mean(loans_test$pred == loans_test$outcome)

Universal Bank Case Solution
No ratings yet
Universal Bank Case Solution
9 pages
Parallel Programming in R
100% (4)
Parallel Programming in R
14 pages
A-Thurs-O2 Absorption-Report
No ratings yet
A-Thurs-O2 Absorption-Report
25 pages
Mining Machinery Maintenance Guide
100% (2)
Mining Machinery Maintenance Guide
22 pages
List of Syllabus of All Subjects in IISc
100% (1)
List of Syllabus of All Subjects in IISc
225 pages
Cooling Tower
No ratings yet
Cooling Tower
10 pages
Wet-Gas Metering for Beginners
No ratings yet
Wet-Gas Metering for Beginners
28 pages
Coding Interview Questions & Solutions
No ratings yet
Coding Interview Questions & Solutions
56 pages
VBScript Examples
No ratings yet
VBScript Examples
8 pages
Materi Trigon English
No ratings yet
Materi Trigon English
5 pages
2F - LP Solution Techniques
100% (1)
2F - LP Solution Techniques
109 pages
Statistics Consulting Cheat Sheet: Kris Sankaran October 1, 2017
100% (1)
Statistics Consulting Cheat Sheet: Kris Sankaran October 1, 2017
44 pages
Codes
No ratings yet
Codes
14 pages
Fundamentals of Data Structures in C - , 2 - Ellis Horowitz, Sahni, Dinesh Mehta
No ratings yet
Fundamentals of Data Structures in C - , 2 - Ellis Horowitz, Sahni, Dinesh Mehta
521 pages
1
No ratings yet
1
19 pages
Predict and Co
No ratings yet
Predict and Co
6 pages
Automata Theory Chapter 2 PDF
No ratings yet
Automata Theory Chapter 2 PDF
12 pages
Bank Loan Title
No ratings yet
Bank Loan Title
7 pages
DM Slip Solutions
100% (1)
DM Slip Solutions
24 pages
Modelling With R
No ratings yet
Modelling With R
3 pages
Machine Learning Project: Choice of Employee Mode of Transport
No ratings yet
Machine Learning Project: Choice of Employee Mode of Transport
35 pages
R Machine Learning Commands Guide
No ratings yet
R Machine Learning Commands Guide
2 pages
Machine Learning for IT Students
No ratings yet
Machine Learning for IT Students
99 pages
A Note On R
No ratings yet
A Note On R
90 pages
Machine Learning A Z Q A
100% (1)
Machine Learning A Z Q A
52 pages
Final Project
No ratings yet
Final Project
9 pages
Discussion 3 Supervised
No ratings yet
Discussion 3 Supervised
14 pages
Handling The Dataset Using R - Word
No ratings yet
Handling The Dataset Using R - Word
54 pages
PID Controller
No ratings yet
PID Controller
5 pages
Lab 4 Classification v.0
No ratings yet
Lab 4 Classification v.0
5 pages
Writing Efficient R Code
No ratings yet
Writing Efficient R Code
5 pages
Practical Machine Learning Course Notes
No ratings yet
Practical Machine Learning Course Notes
76 pages
Logistic Regression Assignment Guide
No ratings yet
Logistic Regression Assignment Guide
20 pages
CASOS
No ratings yet
CASOS
12 pages
EU IT Salary Prediction Analysis
No ratings yet
EU IT Salary Prediction Analysis
23 pages
7708 - MBA PredAnanBigDataNov21
No ratings yet
7708 - MBA PredAnanBigDataNov21
11 pages
Encrypted Data Document
No ratings yet
Encrypted Data Document
14 pages
Solution 1
No ratings yet
Solution 1
6 pages
Article - 10 Machine Learning Algorithms in R
No ratings yet
Article - 10 Machine Learning Algorithms in R
2 pages
Risks: Machine Learning in P&C Insurance: A Review For Pricing and Reserving
No ratings yet
Risks: Machine Learning in P&C Insurance: A Review For Pricing and Reserving
26 pages
Dynamic of Structures
No ratings yet
Dynamic of Structures
10 pages
STAT-2450 Assignment 1: Name:, Student ID: B00
No ratings yet
STAT-2450 Assignment 1: Name:, Student ID: B00
9 pages
Practical Machine Learning Guide
No ratings yet
Practical Machine Learning Guide
7 pages
BDA MSC It
No ratings yet
BDA MSC It
35 pages
Lab 4
No ratings yet
Lab 4
20 pages
Visualizing Big Data With Trelliscope
No ratings yet
Visualizing Big Data With Trelliscope
7 pages
Introduction To Spark With Sparklyr in R
No ratings yet
Introduction To Spark With Sparklyr in R
11 pages
R For Statistical Learning
No ratings yet
R For Statistical Learning
301 pages
FDT Excel Sample
No ratings yet
FDT Excel Sample
4 pages
KUET - Academic Records
No ratings yet
KUET - Academic Records
4 pages
R Assignment
No ratings yet
R Assignment
8 pages
Module 12 - Lesson - 1 - The+Gas+Laws
No ratings yet
Module 12 - Lesson - 1 - The+Gas+Laws
23 pages
Uni T - 2 - R Programming
No ratings yet
Uni T - 2 - R Programming
10 pages
Final Data Lab
No ratings yet
Final Data Lab
21 pages
Grade 4 DLL Quarter 3 Week 1 (Sir Bien Cruz)
No ratings yet
Grade 4 DLL Quarter 3 Week 1 (Sir Bien Cruz)
47 pages
ART 002 Lesson 5 Visual Elements of Arts and Designs
No ratings yet
ART 002 Lesson 5 Visual Elements of Arts and Designs
17 pages
FRA Assignment - India Credit Model
No ratings yet
FRA Assignment - India Credit Model
14 pages
R - Language
No ratings yet
R - Language
23 pages
Age Calculation
No ratings yet
Age Calculation
4 pages
Essential Mathematics 2
No ratings yet
Essential Mathematics 2
1 page
WEEK
No ratings yet
WEEK
17 pages
DSA Lab
No ratings yet
DSA Lab
29 pages
Saurabh
No ratings yet
Saurabh
22 pages
Class - 10 Math Notes Chapter - 11 Constructions
No ratings yet
Class - 10 Math Notes Chapter - 11 Constructions
54 pages
Record
No ratings yet
Record
23 pages
Machine Learning With Titanic Dataset Tutorial
No ratings yet
Machine Learning With Titanic Dataset Tutorial
7 pages
Math Reviewer
No ratings yet
Math Reviewer
1 page
ISYE6501 Homework 1
No ratings yet
ISYE6501 Homework 1
7 pages
Circles
No ratings yet
Circles
2 pages
CAT 2024 Quants Test Series Guide
No ratings yet
CAT 2024 Quants Test Series Guide
3 pages
10 1016@j Ijepes 2020 106314
No ratings yet
10 1016@j Ijepes 2020 106314
13 pages
5th Sem 2019 20 Maths
No ratings yet
5th Sem 2019 20 Maths
11 pages
Module 4: Recommended Exercises: Problem 1: KNN (Exercise 2.4.7 in ISL Textbook, Slightly Modified)
No ratings yet
Module 4: Recommended Exercises: Problem 1: KNN (Exercise 2.4.7 in ISL Textbook, Slightly Modified)
6 pages
Classification
No ratings yet
Classification
36 pages
Data Science
No ratings yet
Data Science
15 pages
Mtech Final
No ratings yet
Mtech Final
16 pages
BDA Lab Manual (12 Weeks)
No ratings yet
BDA Lab Manual (12 Weeks)
22 pages
Data Science
No ratings yet
Data Science
13 pages
DS File Et C1 23
No ratings yet
DS File Et C1 23
15 pages
MAMBA
No ratings yet
MAMBA
5 pages
AI and ML Lab Ex3 To 12
No ratings yet
AI and ML Lab Ex3 To 12
27 pages
R Program
No ratings yet
R Program
22 pages
Notes - With R Code
No ratings yet
Notes - With R Code
7 pages
MKT4080-Codes
No ratings yet
MKT4080-Codes
9 pages
R Codes
No ratings yet
R Codes
5 pages
Rlab
No ratings yet
Rlab
7 pages
Aml Lab
No ratings yet
Aml Lab
6 pages
Inky The Octopus: Based On A Real-Life Aquatic Escape! Erin Guendelsberger & David Leonard Instant Download
No ratings yet
Inky The Octopus: Based On A Real-Life Aquatic Escape! Erin Guendelsberger & David Leonard Instant Download
152 pages
Da Rec
No ratings yet
Da Rec
29 pages