Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
10 views2 pages

Assignment 2

Uploaded by

ironman150899
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views2 pages

Assignment 2

Uploaded by

ironman150899
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

# Required Libraries

import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split, KFold, cross_val_score,
RepeatedKFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.Series(boston.target)

# Split data into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Create a linear regression model
mlr_model = LinearRegression()

# Fit the model on the training data


mlr_model.fit(X_train, y_train)

# Predict the values on the test data


y_pred = mlr_model.predict(X_test)
# Repeated Holdout with multiple iterations
n_repeats = 10
holdout_results = []

for _ in range(n_repeats):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=np.random.randint(1, 100))
mlr_model.fit(X_train, y_train)
y_pred = mlr_model.predict(X_test)

# Store the R^2 score


holdout_results.append(r2_score(y_test, y_pred))

print("Average R^2 for Repeated Holdout: ", np.mean(holdout_results))


# k-Fold Cross Validation (with k=10)
kf = KFold(n_splits=10)
cv_results = cross_val_score(mlr_model, X, y, cv=kf, scoring='r2')

print("Average R^2 for k-Fold Cross-Validation: ", np.mean(cv_results))


# SSE (Sum of Squares of Errors)
sse = np.sum((y_test - y_pred) ** 2)

# SST (Total Sum of Squares)


sst = np.sum((y_test - np.mean(y_test)) ** 2)

# SSR (Sum of Squares of Regression)


ssr = np.sum((y_pred - np.mean(y_test)) ** 2)

# R^2 (Coefficient of Determination)


r2 = r2_score(y_test, y_pred)

# Mean Absolute Error (MAE)


mae = mean_absolute_error(y_test, y_pred)
# Output the results
print(f"SSE: {sse}")
print(f"SSR: {ssr}")
print(f"SST: {sst}")
print(f"R^2: {r2}")
print(f"Mean Absolute Error: {mae}")

You might also like