import numpy as np
import pandas as pd
def pre_processing(df):
X = df.drop([df.columns[-1]], axis=1)
y = df[df.columns[-1]]
return X, y
def calculate_prior_probabilities(y):
counts = y.value_counts().to_dict()
total_samples = len(y)
for cls in counts:
counts[cls] /= total_samples
return counts
def calculate_class_counts(X, y):
class_counts = {}
for feature in X.columns:
class_counts[feature] = {}
for cls in y.unique():
class_counts[feature][cls] = {}
for value in X[feature].unique():
count = sum((X[feature] == value) & (y == cls))
class_counts[feature][cls][value] = count
return class_counts
def calculate_likelihoods(X, y):
class_counts = calculate_class_counts(X, y)
class_totals = {cls: sum(y == cls) for cls in y.unique()}
likelihood_table = {}
for feature in X.columns:
likelihood_table[feature] = {}
for cls in y.unique():
likelihood_table[feature][cls] = {}
for value in X[feature].unique():
likelihood_table[feature][cls][value] = (
(class_counts[feature][cls].get(value, 0) + 1) /
(class_totals[cls] + len(X[feature].unique()))
)
return likelihood_table
def predict_naive_bayes(X_new, priors, likelihood_table, epsilon=1e-
9):
predictions = []
for _, sample in X_new.iterrows():
posteriors = {}
for cls in priors.keys():
posterior = priors[cls]
for feature, value in sample.items():
if feature in likelihood_table:
likelihood = likelihood_table[feature]
[cls].get(value, epsilon)
posterior *= likelihood
posteriors[cls] = posterior
predictions.append(max(posteriors, key=posteriors.get))
return predictions
df = pd.read_csv('heart_failure_clinical_records_dataset.csv')
print('Dataset Preview:')
print(df.head())
X, y = pre_processing(df)
priors = calculate_prior_probabilities(y)
likelihood_table = calculate_likelihoods(X, y)
predictions = predict_naive_bayes(X, priors, likelihood_table)
accuracy = (predictions == y.to_numpy()).mean()
print(f'Training Accuracy: {accuracy:.2f}')
test_queries = pd.DataFrame([
[60, 1, 300, 0, 40, 1, 250000.0, 1.2, 135, 1, 0, 10],
[75, 0, 800, 1, 25, 0, 220000.0, 2.0, 128, 0, 1, 5],
[50, 1, 150, 0, 50, 1, 280000.0, 1.0, 140, 1, 1, 20]
], columns=X.columns)
predictions_test = predict_naive_bayes(test_queries, priors,
likelihood_table)
print('\nTest Predictions:')
for query, pred in zip(test_queries.values, predictions_test):
print(f'Query: {query} → Prediction: {pred}')
Dataset Preview:
age anaemia creatinine_phosphokinase diabetes
ejection_fraction \
0 75.0 0 582 0
20
1 55.0 0 7861 0
38
2 65.0 0 146 0
20
3 50.0 1 111 0
20
4 65.0 1 160 1
20
high_blood_pressure platelets serum_creatinine serum_sodium sex
\
0 1 265000.00 1.9 130 1
1 0 263358.03 1.1 136 1
2 0 162000.00 1.3 129 1
3 0 210000.00 1.9 137 1
4 0 327000.00 2.7 116 0
smoking time DEATH_EVENT
0 0 4 1
1 0 6 1
2 1 7 1
3 0 7 1
4 0 8 1
Training Accuracy: 0.98
Test Predictions:
Query: [6.00e+01 1.00e+00 3.00e+02 0.00e+00 4.00e+01 1.00e+00 2.50e+05
1.20e+00
1.35e+02 1.00e+00 0.00e+00 1.00e+01] → Prediction: 1
Query: [7.50e+01 0.00e+00 8.00e+02 1.00e+00 2.50e+01 0.00e+00 2.20e+05
2.00e+00
1.28e+02 0.00e+00 1.00e+00 5.00e+00] → Prediction: 1
Query: [5.0e+01 1.0e+00 1.5e+02 0.0e+00 5.0e+01 1.0e+00 2.8e+05
1.0e+00 1.4e+02
1.0e+00 1.0e+00 2.0e+01] → Prediction: 0