Machine Learning Programs
Decision Tree on Iris Dataset
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,
accuracy_score
import matplotlib.pyplot as plt
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X,
test_size=0.3, random_state=42)
clf = DecisionTreeClassifier(criterion='entropy', max_depth=3,
random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n",
classification_report(y_test, y_pred))
plt.figure(figsize=(12,6))
plot_tree(clf, filled=True, feature_names=iris.feature_names,
class_names=iris.target_names, rounded=True)
plt.show()
Decision Tree on Breast Cancer Dataset
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,
accuracy_score
import matplotlib.pyplot as plt
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(Xx,
test_size=0.3, random_state=42)
clf = DecisionTreeClassifier(criterion='gini', max_depth=4,
random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n",
classification_report(y_test, y_pred))
plt.figure(figsize=(16,8))
plot_tree(clf, filled=True, feature_names=data.feature_names,
class_names=data.target_names, rounded=True)
plt.show()
Linear Regression on Housing Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error,
mean_squared_error
df = pd.read_csv("Housing.csv")
df = pd.get_dummies()
e =
df.drop(['mainroad_no','guestroom_no','basement_yes','hotwater
heating_yes','airconditioning_yes'], axis=1)
x = df.iloc[:,1:]
y = df.iloc[:,0]
x_train, x_test, y_train, y_test =
train_test_split(x,test_size=0.2)
lm = LinearRegression()
lm.fit(x_train, y_train)
y_pred = lm.predict(x_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))
Linear Regression on Marks Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,
mean_absolute_error, r2_score
import matplotlib.pyplot as plt
df = pd.read_csv("marks_datasets.csv")
print(df.info())
x = df['CIE'].values.reshape(-1,1)
y = df['SEE'].values.reshape(-1,1)
x_train, x_test, y_train, y_test = train_test_split(x,
random_state=0)
ln = LinearRegression()
lm.fit(x_train, y_train)
y_pred = lm.predict(x_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
plt.scatter(x_train, y_train, color='g')
plt.plot(x_test, y_pred, color='f')
plt.show()