AIML PROGRAMS
4)BAYESIAN NETWORK
PROGRAM:
import pandas as pd
import numpy as np
!pip install pgmpy
import pgmpy
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
# Placeholder for fetching the Heart Disease dataset
def fetch_ucirepo(id):
# Mock implementation of a dataset
class Data:
def __init__(self):
self.features = pd.DataFrame({
'age': [63, 67, 67, 37, 41, 56, 29],
'sex': [1, 1, 1, 1, 0, 1, 1],
'cp': [1, 4, 4, 3, 2, 2, 1],
'trestbps': [145, 160, 120, 130, 130, 120, 120],
'chol': [233, 286, 229, 250, 204, 236, 230],
'fbs': ['?', 0, 0, 0, 0, 0, 0],
'restecg': [2, 2, 2, 0, 2, 0, 2],
'thalach': [150, 108, 129, 187, 172, 178, 171],
'exang': [0, 1, 1, 0, 0, 0, 1],
'oldpeak': [2.3, 1.5, 2.6, 3.5, 1.4, 0.8, 1.2],
'slope': [3, 2, 2, 3, 1, 1, 1],
'ca': ['0', '3', '2', '0', '0', '?','0'],
'thal': ['6', '3', '7', '3', '3', '3','3']
})
self.targets = pd.Series([0, 1, 1, 0, 0, 1], name='target'
return Data()
# Fetching the Heart Disease dataset
heart_disease = fetch_ucirepo(id=45)
X = heart_disease.features
y = heart_disease.targets
# Combine features and targets into a single DataFrame
data = X.copy()
data['target'] = y
# Replace '?' with NaN and convert columns to appropriate types
data.replace('?', np.nan, inplace=True)
data = data.astype(float)
# Drop rows with NaN values (or you can use imputation)
data.dropna(inplace=True)
# Define the structure of the Bayesian Network
model = BayesianNetwork([
('age', 'trestbps'),
('sex', 'trestbps'),
('cp', 'target'),
('trestbps', 'target'),
('chol', 'target'),
('fbs', 'target'),
('restecg', 'target'),
('thalach', 'target'),
('exang', 'target'),
('oldpeak', 'target'),
('slope', 'target'),
('ca', 'target'),
('thal', 'target')
])
# Train the model using Maximum Likelihood Estimation
model.fit(data, estimator=MaximumLikelihoodEstimator)
# Perform inference
infer = VariableElimination(model)
# Verify model nodes
print("Model nodes:", model.nodes())
print("Unique values of 'age':", data['age'].unique())
# Example: Query the probability of having heart disease (target=1) given some evidence
try:
# Adjust the evidence keys to match the actual column names in the DataFrame
q = infer.query(variables=['target'], evidence={'age': 37, 'sex': 1, 'cp': 2})
print(q)
except KeyError as e:
print(f"KeyError: {e}. Make sure the evidence variables are part of the model.")
except ValueError as e:
print(f"ValueError: {e}. Check the values and data types of your evidence.")
OUTPUT:
Model nodes: ['age', 'trestbps', 'sex', 'cp', 'target', 'chol', 'fbs', 'restecg', 'thalach', 'exang',
'oldpeak', 'slope', 'ca', 'thal']
Unique values of 'age': [67. 37. 41.]
+-------------+---------------+
| target | phi(target) |
+=============+===============+
| target(0.0) | 0.5003 |
+-------------+---------------+
| target(1.0) | 0.4997 |
10)Bayesian Network EM Algorithm
PROGRAM:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import ExpectationMaximization as EM
# Load the data
data = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 3)), columns=['A', 'C', 'D'])
# Define the Bayesian Network with a latent variable 'B'
model = BayesianNetwork([('A', 'B'), ('C', 'B'), ('C', 'D')], latents={'B'})
# Estimate all model parameters (CPDs) using Expectation Maximization
estimator = EM(model, data)
estimated_model = estimator.get_parameters(latent_card={'B': 3})
# Print the estimated CPDs for verification
for cpd in estimated_model:
print(cpd)
OUTPUT:
+------+-------+
| A(0) | 0.496 |
+------+-------+
| A(1) | 0.504 |
+------+-------+
+------+-------+
| C(0) | 0.486 |
+------+-------+
| C(1) | 0.514 |
+------+-------+
+------+---------------------+--------------------+
| C | C(0) | C(1) |
+------+---------------------+--------------------+
| D(0) | 0.5020576131687243 | 0.5038910505836576 |
+------+---------------------+--------------------+
| D(1) | 0.49794238683127573 | 0.4961089494163424 |
+------+---------------------+--------------------+
+------+---------------------+---------------------+---------------------+---------------------+
| A | A(0) | A(0) | A(1) | A(1) |
+------+---------------------+---------------------+---------------------+---------------------+
| C | C(0) | C(1) | C(0) | C(1) |
+------+---------------------+---------------------+---------------------+---------------------+
| B(0) | 0.32123038969269785 | 0.15257647518865883 | 0.33298788674129215 |
0.5462520622273568 |
+------+---------------------+---------------------+---------------------+---------------------+
| B(1) | 0.209862688472388 | 0.5026878442568229 | 0.17750899501100845 |
0.16049969699143238 |
+------+---------------------+---------------------+---------------------+---------------------+
| B(2) | 0.46890692183491417 | 0.3447356805545183 | 0.4895031182476994 |
0.29324824078121087 |
5)Build Regression models
PROGRAM:
import pandas as pd
import numpy as np
# Set the random seed for reproducibility
np.random.seed(42)
# Generate example data
num_samples = 100
feature1 = np.random.rand(num_samples) * 10
feature2 = np.random.rand(num_samples) * 20
feature3 = np.random.rand(num_samples) * 30
noise = np.random.randn(num_samples) * 5
# Define a linear relationship with some noise
target = 2 * feature1 + 3 * feature2 + 4 * feature3 + noise
# Create a DataFrame
df = pd.DataFrame({
'feature1': feature1,
'feature2': feature2,
'feature3': feature3,
'target': target
})
# Save the DataFrame to a CSV file
df.to_csv('dataset.csv', index=False)
# Print the first few rows of the dataset
print(df.head())
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# Load the dataset
df = pd.read_csv('dataset.csv')
# Define the features and target variable
X = df[['feature1', 'feature2', 'feature3']]
y = df['target']
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train the regression model
reg = LinearRegression()
reg.fit(X_train, y_train)
# Make predictions on the test set
y_pred = reg.predict(X_test)
# Evaluate the model
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination (R^2 score): %.2f' % r2_score(y_test, y_pred))
# Plot the results
# Assuming 'feature1' is the feature to be plotted against the target
plt.scatter(X_test['feature1'], y_test, color='black', label='Actual')
plt.scatter(X_test['feature1'], y_pred, color='blue', label='Predicted')
plt.xlabel('feature1')
plt.ylabel('target')
plt.title('Actual vs Predicted values')
plt.legend()
plt.show()
OUTPUT:
feature1 feature2 feature3 target
0 3.745401 0.628584 19.260949 86.648210
1 9.507143 12.728208 2.524199 64.037705
2 7.319939 6.287120 4.848861 63.616404
3 5.986585 10.171414 26.956626 153.483509
4 1.560186 18.151329 18.192872 120.220135
Mean squared error: 52.54
Coefficient of determination (R^2 score): 0.97
6)Build decision trees and random forests
PROGRAM:
# Print the first few rows of the DataFrame
print(data.head())
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Load data
data = pd.read_csv('example_data.csv')
# Split data into features and target
X = data.drop(['target'], axis=1)
y = data['target']
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Build and train Decision Tree Regressor
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
# Predict on test set using Decision Tree
y_pred_dt = dt.predict(X_test)
# Evaluate performance of Decision Tree
mse_dt = mean_squared_error(y_test, y_pred_dt)
print(f"Decision Tree Mean Squared Error: {mse_dt:.4f}")
# Build and train Random Forest Regressor
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
# Predict on test set using Random Forest
y_pred_rf = rf.predict(X_test)
# Evaluate performance of Random Forest
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest Mean Squared Error: {mse_rf:.4f}")