Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
58 views7 pages

Machine Learning Regression Lab Tasks

The document outlines a lab assignment focused on machine learning techniques, specifically involving linear and polynomial regression models. It includes tasks such as loading datasets, training models, evaluating performance, and visualizing results. The assignment is submitted by a student named Muhammad Shaheer Ali and is due on December 18, 2024.

Uploaded by

shaheeralik2005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
58 views7 pages

Machine Learning Regression Lab Tasks

The document outlines a lab assignment focused on machine learning techniques, specifically involving linear and polynomial regression models. It includes tasks such as loading datasets, training models, evaluating performance, and visualizing results. The assignment is submitted by a student named Muhammad Shaheer Ali and is due on December 18, 2024.

Uploaded by

shaheeralik2005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 7

NATIONAL UNIVERSITY OF SCIENCES AND

TECHNOLOGY

APPLICATION OF ICT
LAB ASSIGNMENT #11
MACHINE LEARNING PART 3

SUBMITTED TO: MR. MUHAMMAD ADNAN


DATE OF SUBMISSION: 18 December 2024

NAME CLASS CMS ID

Muhammad Shaheer Ali BEE-16D 509801


Khan

1
LAB TASKS:
1. Load the student scores dataset and inspect its structure.
2. Train a simple linear regression model to predict scores and visualize the regression line.
3. Use the California Housing dataset to train a multiple regression model and evaluate its
performance.
4. Train a polynomial regression model on student scores and compare it with the linear
model.
5. Explore the impact of varying test splits (20%, 30%, 40%) on model performance

Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.datasets import fetch_california_housing

def load_and_inspect_student_data(url="https://bit.ly/3bD4VXe"):
"""Task 1: Load and inspect student scores dataset"""
print("\n=== Task 1: Loading and Inspecting Student Data ===")
stud_scores = pd.read_csv(url)
print("Dataset Head:")
print(stud_scores.head())
print("\nDataset Info:")
print(stud_scores.info())

2
return stud_scores

def simple_linear_regression(data):
"""Task 2: Simple Linear Regression"""
print("\n=== Task 2: Simple Linear Regression ===")
X = data['Hours'].to_numpy().reshape(-1, 1)
y = data['Scores'].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Make predictions
y_pred = regressor.predict(X_test)

# Evaluate model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Slope: {regressor.coef_[0]:.4f}")
print(f"Intercept: {regressor.intercept_:.4f}")
print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared Score: {r2:.4f}")

# Visualize results
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.plot(X_test, y_pred, color='red', label='Regression Line')
plt.xlabel('Hours Studied')
plt.ylabel('Scores')

3
plt.title('Simple Linear Regression: Hours vs Scores')
plt.legend()
plt.show()

return regressor, mse, r2

def multiple_regression():
"""Task 3: Multiple Regression with California Housing Dataset"""
print("\n=== Task 3: Multiple Regression ===")

# Load California Housing dataset


housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split the data


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Make predictions and evaluate


y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Feature Names:", housing.feature_names)


print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared Score: {r2:.4f}")

# Print feature importance


for name, coef in zip(housing.feature_names, regressor.coef_):

4
print(f"{name}: {coef:.4f}")

return regressor, mse, r2

def polynomial_regression(data):
"""Task 4: Polynomial Regression"""
print("\n=== Task 4: Polynomial Regression ===")

X = data['Hours'].to_numpy().reshape(-1, 1)
y = data['Scores'].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train polynomial model


poly_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
poly_model.fit(X_train, y_train)

# Make predictions
y_poly_pred = poly_model.predict(X_test)

# Evaluate model
mse_poly = mean_squared_error(y_test, y_poly_pred)
r2_poly = r2_score(y_test, y_poly_pred)

print(f"Polynomial Mean Squared Error: {mse_poly:.4f}")


print(f"Polynomial R-squared Score: {r2_poly:.4f}")

# Visualize results
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, color='blue', label='Actual')

# Sort X_test for smooth curve plotting

5
X_test_sorted = np.sort(X_test, axis=0)
y_poly_pred_sorted = poly_model.predict(X_test_sorted)

plt.plot(X_test_sorted, y_poly_pred_sorted, color='red', label='Polynomial Regression')


plt.xlabel('Hours Studied')
plt.ylabel('Scores')
plt.title('Polynomial Regression: Hours vs Scores')
plt.legend()
plt.show()

return poly_model, mse_poly, r2_poly

def test_split_impact(data):
"""Task 5: Impact of Different Test Split Sizes"""
print("\n=== Task 5: Impact of Different Test Split Sizes ===")

X = data['Hours'].to_numpy().reshape(-1, 1)
y = data['Scores'].to_numpy()

test_sizes = [0.2, 0.3, 0.4]


results = []

for test_size in test_sizes:


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

regressor = LinearRegression()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

mse = mean_squared_error(y_test, y_pred)


r2 = r2_score(y_test, y_pred)

6
results.append({
'test_size': test_size,
'mse': mse,
'r2': r2
})
print(f"\nTest Size: {test_size*100}%")
print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared Score: {r2:.4f}")

return results

def main():
# Execute all tasks
data = load_and_inspect_student_data()
simple_linear_regression(data)
multiple_regression()
polynomial_regression(data)
test_split_impact(data)

if _name_ == "_main_":
main()

OUTPUT:
Attached in zip

You might also like