0% found this document useful (0 votes)

8 views10 pages

Decision Tree

The document outlines the implementation of a decision tree algorithm using R to classify drug types based on various features from a dataset. It includes data loading, preprocessing steps such as outlier detection and replacement, and visualization of data distributions. The model is trained and evaluated, achieving an accuracy of approximately 96.49% on the test set.

Uploaded by

akanaguhari

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

8 views10 pages

Decision Tree

Uploaded by

akanaguhari

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 10

DECISION TREE ALGORITHM

24CSEG034

2025-04-06
# Load libraries
library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

library(lattice)
library(rpart)
library(rpart.plot)
library(corrplot)

## corrplot 0.95 loaded

library(RColorBrewer)

library(readr)
df <- read_csv("C:/Users/prane/Downloads/drug200.csv")

## Rows: 200 Columns: 6

## ── Column specification
────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Sex, BP, Cholesterol, Drug
## dbl (2): Age, Na_to_K
##
## ℹ Use `spec()` to retrieve the full column specification for this
data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet
this message.

# Summary
summary(df)

## Age Sex BP Cholesterol

## Min. :15.00 Length:200 Length:200 Length:200

## 1st Qu.:31.00 Class :character Class :character

Class :character
## Median :45.00 Mode :character Mode :character
Mode :character
## Mean :44.31

## 3rd Qu.:58.00
## Max. :74.00

## Na_to_K Drug
## Min. : 6.269 Length:200
## 1st Qu.:10.445 Class :character
## Median :13.937 Mode :character
## Mean :16.084
## 3rd Qu.:19.380
## Max. :38.247

# Check for missing values

missing_values <- colSums(is.na(df))
print(missing_values)

## Age Sex BP Cholesterol Na_to_K

Drug
## 0 0 0 0 0
0

# Detect outliers
boxplot.stats(df$Age)$out

## numeric(0)

boxplot.stats(df$Na_to_K)$o

## [1] 33.486 38.247 35.639 33.542 32.922 37.188 34.997 34.686

# Boxplot for all numeric variables

numeric_df <- df[, sapply(df, is.numeric)]
long_df <- stack(numeric_df)
bwplot(values ~ ind, data = long_df, col = "darkgreen", fill =
"lightyellow",
main = "Boxplots for Outlier Detection", xlab = "Features",
ylab = "Values")
# Replace outliers with median
for (col in names(df)[sapply(df, is.numeric)]) {
Q1 <- quantile(df[[col]], 0.25, na.rm = TRUE)
Q3 <- quantile(df[[col]], 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower <- Q1 - 1.5 * IQR
upper <- Q3 + 1.5 * IQR
med <- median(df[[col]], na.rm = TRUE)
df[[col]][df[[col]] < lower | df[[col]] > upper] <- med
}

df_cleaned <- df

print(names(df))

## [1] "Age" "Sex" "BP" "Cholesterol"

"Na_to_K"
## [6] "Drug"

# Plots
histogram(~Age | Drug, data = df_cleaned, layout = c(3,2),
col = "darkorange", border = "black", main = "Age
Distribution by Drug", breaks = 20)
bwplot(Drug ~ Na_to_K, data = df_cleaned, col = "purple", fill =
"lavender",
main = "Sodium-Potassium Ratio by Drug", xlab = "NaToK", ylab =
"Drug")
xyplot(Age ~ Na_to_K | Drug, data = df_cleaned, col = "firebrick", pch
= 16, cex = 1.2,
main = "Age vs NaToK by Drug", xlab = "NaToK", ylab = "Age")
bwplot(Age ~ Sex, data = df_cleaned, col = c("lightblue", "deeppink"),
main = "Age Distribution by Sex", xlab = "Sex", ylab = "Age")

bwplot(Na_to_K ~ Sex, data = df_cleaned, col = c("lightblue",

"deeppink"),
main = "Sodium-Potassium Ratio by Sex", xlab = "Sex", ylab =
"NaToK")
# Correlation plot
num_vars <- df_cleaned[, sapply(df_cleaned, is.numeric)]
cor_matrix <- cor(num_vars)
corrplot(cor_matrix, method = "color", col = brewer.pal(8, "PiYG"),
tl.col = "black", tl.srt = 45, addCoef.col = "black",
number.cex = 0.7,
title = "Correlation Matrix", mar = c(0,0,2,0))
# Split data
set.seed(123)
trainIndex <- createDataPartition(df_cleaned$Drug, p = 0.7, list =
FALSE)
trainData <- df_cleaned[trainIndex, ]
testData <- df_cleaned[-trainIndex, ]

# Train decision tree

drug_model <- rpart(Drug ~ Age + Sex + BP + Cholesterol + Na_to_K,
data = trainData, method = "class")
rpart.plot(drug_model, main = "Decision Tree for Drug Classification",
box.col = "lightsteelblue", shadow.col = "gray")
# Prediction
trainData$Drug <- factor(trainData$Drug)
testData$Drug <- factor(testData$Drug, levels =
levels(trainData$Drug))
predictions <- predict(drug_model, testData, type = "class")
predictions <- factor(predictions, levels = levels(testData$Drug))

# Accuracy
conf_matrix <- confusionMatrix(predictions, testData$Drug)
conf_matrix

## Confusion Matrix and Statistics

##
## Reference
## Prediction drugA drugB drugC drugX drugY
## drugA 6 0 0 0 1
## drugB 0 4 0 0 0
## drugC 0 0 4 0 0
## drugX 0 0 0 16 1
## drugY 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.9649
## 95% CI : (0.8789, 0.9957)
## No Information Rate : 0.4737
## P-Value [Acc > NIR] : 6.477e-16
##
## Kappa : 0.9488
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: drugA Class: drugB Class: drugC Class:
drugX
## Sensitivity 1.0000 1.00000 1.00000
1.0000
## Specificity 0.9804 1.00000 1.00000
0.9756
## Pos Pred Value 0.8571 1.00000 1.00000
0.9412
## Neg Pred Value 1.0000 1.00000 1.00000
1.0000
## Prevalence 0.1053 0.07018 0.07018
0.2807
## Detection Rate 0.1053 0.07018 0.07018
0.2807
## Detection Prevalence 0.1228 0.07018 0.07018
0.2982
## Balanced Accuracy 0.9902 1.00000 1.00000
0.9878
## Class: drugY
## Sensitivity 0.9259
## Specificity 1.0000
## Pos Pred Value 1.0000
## Neg Pred Value 0.9375
## Prevalence 0.4737
## Detection Rate 0.4386
## Detection Prevalence 0.4386
## Balanced Accuracy 0.9630

Classification
No ratings yet
Classification
4 pages
Practical Machine Learning
No ratings yet
Practical Machine Learning
11 pages
Ebay Auction Case Solution
No ratings yet
Ebay Auction Case Solution
9 pages
期末作業
No ratings yet
期末作業
10 pages
Ex 10 - Decision Tree With Rpart and Fancy Plot and Cardio Data
No ratings yet
Ex 10 - Decision Tree With Rpart and Fancy Plot and Cardio Data
4 pages
DS Report 03
No ratings yet
DS Report 03
30 pages
Stroke Prediction Dataset
No ratings yet
Stroke Prediction Dataset
48 pages
TP3.ipynb - Colab
No ratings yet
TP3.ipynb - Colab
17 pages
Assignment# 06
No ratings yet
Assignment# 06
16 pages
kNN with R Caret Package Guide
No ratings yet
kNN with R Caret Package Guide
17 pages
Muhamad Choza Inul Muna - Analisis Sentimen
No ratings yet
Muhamad Choza Inul Muna - Analisis Sentimen
8 pages
Shark Tank Deal Prediction - Uudhya - Dec 2019
No ratings yet
Shark Tank Deal Prediction - Uudhya - Dec 2019
16 pages
Final Data Lab
No ratings yet
Final Data Lab
21 pages
7708 - MBA PredAnanBigDataNov21
No ratings yet
7708 - MBA PredAnanBigDataNov21
11 pages
Sahanashree Ex-2 ML
No ratings yet
Sahanashree Ex-2 ML
9 pages
Heart Disease Prediction Model
No ratings yet
Heart Disease Prediction Model
35 pages
20BCE1205 Lab6
No ratings yet
20BCE1205 Lab6
12 pages
Diabetes Dectection
No ratings yet
Diabetes Dectection
7 pages
Tugas 3
No ratings yet
Tugas 3
20 pages
Exp 5
No ratings yet
Exp 5
7 pages
HUST PPT Template 2022 RED 16x9 567042-2
No ratings yet
HUST PPT Template 2022 RED 16x9 567042-2
25 pages
R Package Setup for Data Analysis
No ratings yet
R Package Setup for Data Analysis
21 pages
Healthcare Analytics
No ratings yet
Healthcare Analytics
72 pages
Diabetes Prediction Using Machine Learning
No ratings yet
Diabetes Prediction Using Machine Learning
20 pages
ML Proj Diabetes
No ratings yet
ML Proj Diabetes
51 pages
Data Perparation Penting
No ratings yet
Data Perparation Penting
12 pages
Case Study 3 Explanation
No ratings yet
Case Study 3 Explanation
4 pages
Pima Indians Diabetes Database Analysis - Kaggle
No ratings yet
Pima Indians Diabetes Database Analysis - Kaggle
37 pages
Task 1 RR Usa
No ratings yet
Task 1 RR Usa
5 pages
Heart Failure Prediction
100% (1)
Heart Failure Prediction
41 pages
EPA HQ OPP 2017 0180 0016 - Attachment - 70
No ratings yet
EPA HQ OPP 2017 0180 0016 - Attachment - 70
4 pages
Reliability Report Example
No ratings yet
Reliability Report Example
4 pages
Mla - 2 (Cia - 2) - 20221013
No ratings yet
Mla - 2 (Cia - 2) - 20221013
14 pages
Explanationdocx
No ratings yet
Explanationdocx
9 pages
Data Pre-Processing
No ratings yet
Data Pre-Processing
22 pages
Heart Disease Prediction Model
No ratings yet
Heart Disease Prediction Model
19 pages
Heart Failure Prediction With Detailed Headings
No ratings yet
Heart Failure Prediction With Detailed Headings
12 pages
Pima Tutorial
No ratings yet
Pima Tutorial
8 pages
Mla - 2 (Cia - 1) - 20221013
No ratings yet
Mla - 2 (Cia - 1) - 20221013
14 pages
ML0101EN Clas Decision Trees Drug Py v1
No ratings yet
ML0101EN Clas Decision Trees Drug Py v1
12 pages
Code
No ratings yet
Code
25 pages
Non-Compartmental PK Analysis
No ratings yet
Non-Compartmental PK Analysis
43 pages
q3 Stat2100 Bautista-Lhuriely
No ratings yet
q3 Stat2100 Bautista-Lhuriely
11 pages
AMDA Practical - A048
No ratings yet
AMDA Practical - A048
35 pages
02 Pca
No ratings yet
02 Pca
14 pages
Output
No ratings yet
Output
2 pages
Medidas de Tendencia Central 2020 PDF
No ratings yet
Medidas de Tendencia Central 2020 PDF
26 pages
Da 06-10
No ratings yet
Da 06-10
14 pages
QT Report
No ratings yet
QT Report
20 pages
Final
No ratings yet
Final
13 pages
BAN5
No ratings yet
BAN5
2 pages
Major Project - Colab
No ratings yet
Major Project - Colab
15 pages
Rubel Assignment 3
No ratings yet
Rubel Assignment 3
5 pages
Netram
No ratings yet
Netram
11 pages
HW 9
No ratings yet
HW 9
10 pages
Heart Disease Report With Comments and Code
No ratings yet
Heart Disease Report With Comments and Code
9 pages
Heart Disease Indicator Prediction Model
No ratings yet
Heart Disease Indicator Prediction Model
17 pages
My Bets
No ratings yet
My Bets
1 page
UsterStatistics - 2025 07 18 - 13 36 10
No ratings yet
UsterStatistics - 2025 07 18 - 13 36 10
36 pages
Barry Meadow - Money Management Part 1
67% (3)
Barry Meadow - Money Management Part 1
4 pages
Answers Apate264 EXAM2
No ratings yet
Answers Apate264 EXAM2
8 pages
K.L.E. Institute of Technology
No ratings yet
K.L.E. Institute of Technology
2 pages
WWW - Stat.tamu - Edu Lzhou Stat302 Standardnormaltable
No ratings yet
WWW - Stat.tamu - Edu Lzhou Stat302 Standardnormaltable
2 pages
Handicap Chase Betting System
No ratings yet
Handicap Chase Betting System
32 pages
DAMATH Board
No ratings yet
DAMATH Board
4 pages
OMS. Perimetro Cefalico Meninos - 0-5 Anos. em Z Score.
No ratings yet
OMS. Perimetro Cefalico Meninos - 0-5 Anos. em Z Score.
1 page
Vanajenahalli Layout Plan-24!07!2023 - Odd
No ratings yet
Vanajenahalli Layout Plan-24!07!2023 - Odd
1 page
Maths 3
No ratings yet
Maths 3
57 pages
Tabel Distribusi Normal Baku
No ratings yet
Tabel Distribusi Normal Baku
25 pages
Fast Results - Greyhound Racing - Sporting Life 12
No ratings yet
Fast Results - Greyhound Racing - Sporting Life 12
1 page
Odd Comparison Note
No ratings yet
Odd Comparison Note
11 pages
Bets Tracker
No ratings yet
Bets Tracker
2 pages
Over 2.5 Goal Results
No ratings yet
Over 2.5 Goal Results
2 pages
Buderer 1996
No ratings yet
Buderer 1996
6 pages
Decimals & Fractions - AFCAT MCQs (50 Questions) by Vivek Sir @afawithlove
No ratings yet
Decimals & Fractions - AFCAT MCQs (50 Questions) by Vivek Sir @afawithlove
11 pages
Simplified Field Tables: Weight-For-Length BOYS Birth To 2 Years (Z-Scores)
No ratings yet
Simplified Field Tables: Weight-For-Length BOYS Birth To 2 Years (Z-Scores)
5 pages
MENANGBET88 Daftar Situs Bandar Judi Bola Online Sbobet Terbesar
No ratings yet
MENANGBET88 Daftar Situs Bandar Judi Bola Online Sbobet Terbesar
1 page
Z Score Table
No ratings yet
Z Score Table
2 pages
Football Betting Secrets PDF
100% (1)
Football Betting Secrets PDF
35 pages
Pusat Pertumbuhan Kota Dan Hinterland
No ratings yet
Pusat Pertumbuhan Kota Dan Hinterland
9 pages
MST 567 Quiz
No ratings yet
MST 567 Quiz
2 pages
Matched Betting Maths
No ratings yet
Matched Betting Maths
9 pages
Maths 4thgrade Finals-1
No ratings yet
Maths 4thgrade Finals-1
81 pages
Sensi Dan Spesi Metode Apung
No ratings yet
Sensi Dan Spesi Metode Apung
2 pages
Z Score Girls BMI
100% (2)
Z Score Girls BMI
6 pages
Abertura vs % Passing Analysis
No ratings yet
Abertura vs % Passing Analysis
10 pages
Medical Test Accuracy Explained
No ratings yet
Medical Test Accuracy Explained
12 pages

Decision Tree

Uploaded by

Decision Tree

Uploaded by

DECISION TREE ALGORITHM

## Loading required package: ggplot2

## Loading required package: lattice

## corrplot 0.95 loaded

## Rows: 200 Columns: 6

## Age Sex BP Cholesterol

## Min. :15.00 Length:200 Length:200 Length:200

## 1st Qu.:31.00 Class :character Class :character

# Check for missing values

## Age Sex BP Cholesterol Na_to_K

## [1] 33.486 38.247 35.639 33.542 32.922 37.188 34.997 34.686

# Boxplot for all numeric variables

## [1] "Age" "Sex" "BP" "Cholesterol"

bwplot(Na_to_K ~ Sex, data = df_cleaned, col = c("lightblue",

# Train decision tree

## Confusion Matrix and Statistics

You might also like