Program
import pandas as pd
df =
pd.read_excel('study
.xlsl') df
df.dropna(axis=0,
inplace=True) df
df.fillna(0) df
Output:
Program:
import
pandas as pd
# Sample
dataset
data = {'Length': [5.1, 4.9, 4.7, 4.6, 5.0],'Width': [3.5, 3.0, 3.2, 3.1, 3.6],'Height':
[1.4, 1.4, 1.3, 1.5, 1.4]}
df = pd.DataFrame(data)
df['Volume'] = df['Length'] * df['Width'] * df['Height'] print(df)
Output:
Program:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import
train_test_split from
sklearn.linear_model import
LinearRegression np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.rand(100, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42) model = LinearRegression()
model.fit(X_train,
y_train) y_pred =
model.predict(X_test
plt.scatter(X_train, y_train,
label='Training Data')
plt.scatter(X_test, y_test,
label='Test Data')
plt.plot(X_test, y_pred, color='red', linewidth=3,
label='Regression Line') plt.xlabel('X')
plt.yla
bel('y')
plt.leg
end()
plt.sho
w()
Output:
Program:
from sklearn.datasets import
make_classification from
sklearn.neighbors import
KNeighborsClassifier from
sklearn.model_selection import
train_test_split from sklearn.metrics
import accuracy_score
X, y = make_classification(n_samples=100, n_features=2, n_informative=2,
n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42) knn =
KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train) y_pred = knn.predict(X_test)
accuracy =
accuracy_score(y_test,
y_pred) print(f'Accuracy:
{accuracy}')
output:
Program:
import numpy as np
import matplotlib.pyplot
as plt from
sklearn.cluster import
KMeans
from sklearn.datasets import make_blobs
n_samples = 400
n_features = 70
n_clusters = 80
X, _ = make_blobs(n_samples=n_samples, n_features=n_features,
centers=n_clusters, random_state=42)
kmeans =
KMeans(n_clusters=3)
kmeans.fit(X)
labels = kmeans.labels_
centers = kmeans.cluster_centers_
plt.scatter(X[:, 0], X[:, 1], c=labels,
cmap='viridis')
plt.scatter(centers[:, 0], centers[:, 1], c='red', marker='x', s=200,
label='Cluster Centers') plt.legend()
plt.title('K-Means
Clustering')
plt.show()
Output:
Program:
import
numpy as
import
matplotlib.p
yplot as plt
from sklearn.datasets import
make_classification from
sklearn.linear_model import
LogisticRegression from
sklearn.model_selection import
train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,
classification_report
X, y = make_classification(n_samples=100, n_features=2, n_classes=2,
n_clusters_per_class=1, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42) clf = LogisticRegression()
clf.fit(X_train,
y_train) y_pred =
clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) print(f'Accuracy: {accuracy}')
conf_matrix =
confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)
class_report = classification_report(y_test, y_pred) print('Classification Report:')
print(class_report)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap='coolwarm', marker='o',
s=50, label='Actual') plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred,
cmap='coolwarm', marker='x', s=50, label='Predicted') plt.legend(loc='best')
plt.title('Binary Classification - Logistic Regression') plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
h = .02 # Step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap='coolwarm', alpha=0.5) plt.show()
Output:
Program:
import numpy as np
import matplotlib.pyplot as plt from sklearn import datasets
from sklearn.model_selection import train_test_split from sklearn.svm import
SVC
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
X, y = datasets.make_classification(n_samples=200, n_features=2, n_classes=2,
n_clusters_per_class=1, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42) svm_classifier =
SVC(kernel='linear')
svm_classifier.fit(X_train, y_train) y_pred = svm_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) print(f'Accuracy: {accuracy:.2f}')
conf_matrix = confusion_matrix(y_test, y_pred) print('Confusion Matrix:')
print(conf_matrix)
class_report = classification_report(y_test, y_pred) print('Classification
Report:')
print(class_report)
def plot_decision_boundary(X, y, model, title): x_min, x_max = X[:, 0].min() - 1,
X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min,
y_max, 0.02)) Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, alpha=0.6)
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2') plt.title(title)
plt.figure(figsize=(10, 5)) plt.subplot(121)
plot_decision_boundary(X_train, y_train, svm_classifier, 'SVM Decision
Boundary (Training Data)') plt.subplot(122)
plot_decision_boundary(X_test, y_test, svm_classifier, 'SVM Decision
Boundary (Testing Data)') plt.tight_layout()
plt.show()
Output: