SUPERVISED LEARNING EXAMPLE
from sklearn.datasets import load_iris
# Load Iris data
iris = load_iris()
X = iris.data
y = iris.target
# Feature names and target classes
print("Feature names:", iris.feature_names)
print("Target names:", iris.target_names)
# Sample data
print("First 5 samples:\n", X[:5])
print("First 5 targets:\n", y[:5])
import pandas as pd
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = [iris.target_names[i] for i in y]
# Display first few rows
print(df.head())
from sklearn.model_selection import train_test_split
# 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred,
target_names=iris.target_names))
X_reduced = X[:, [0, 2, 3]] # keep indices 0, 2, 3
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_reduced, y, test_size=0.2,
random_state=42)
scaler_r = StandardScaler()
X_train_r_scaled = scaler_r.fit_transform(X_train_r)
X_test_r_scaled = scaler_r.transform(X_test_r)
model_r = DecisionTreeClassifier()
model_r.fit(X_train_r_scaled, y_train_r)
y_pred_r = model_r.predict(X_test_r_scaled)
print("Accuracy without 'sepal width':", accuracy_score(y_test_r, y_pred_r))
model_ns = DecisionTreeClassifier()
model_ns.fit(X_train, y_train)
y_pred_ns = model_ns.predict(X_test)
print("Accuracy without scaling:", accuracy_score(y_test, y_pred_ns))
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=iris.target_names,
yticklabels=iris.target_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
UNSUPERVISED LEARNING EXAMPLE
# Step 1: Load libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# Step 2: Load the Iris dataset (without using target labels)
iris = load_iris()
X = iris.data
feature_names = iris.feature_names
df = pd.DataFrame(X, columns=feature_names)
# Step 3: Use KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(X)
# Step 4: Reduce dimensions for visualization using PCA
pca = PCA(n_components=2)
components = pca.fit_transform(X)
df['PC1'] = components[:, 0]
df['PC2'] = components[:, 1]
# Step 5: Plot the clusters
sns.scatterplot(data=df, x='PC1', y='PC2', hue='cluster', palette='Set1')
plt.title("K-Means Clustering (Unsupervised)")
plt.show()
# Optional: Add actual labels for visual comparison
df['actual'] = iris.target
# Compare real species and predicted clusters
sns.scatterplot(data=df, x='PC1', y='PC2', hue='actual', palette='Set2')
plt.title("Actual Iris Species")
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
# Step 2: Load the Iris dataset
iris = load_iris()
X = iris.data
feature_names = iris.feature_names
df = pd.DataFrame(X, columns=feature_names)
def add_pca(df, X):
pca = PCA(n_components=2)
components = pca.fit_transform(X)
df['PC1'] = components[:, 0]
df['PC2'] = components[:, 1]
return df
for k in [2, 3, 4, 5]:
kmeans = KMeans(n_clusters=k, random_state=42)
df_k = df.copy()
df_k['cluster'] = kmeans.fit_predict(X)
df_k = add_pca(df_k, X)
plt.figure()
sns.scatterplot(data=df_k, x='PC1', y='PC2', hue='cluster', palette='Set1')
plt.title(f"KMeans Clustering with k={k}")
plt.show()
agg = AgglomerativeClustering(n_clusters=3)
df_agg = df.copy()
df_agg['cluster'] = agg.fit_predict(X)
df_agg = add_pca(df_agg, X)
plt.figure()
sns.scatterplot(data=df_agg, x='PC1', y='PC2', hue='cluster', palette='Set2')
plt.title("Agglomerative Clustering")
plt.show()
# Standardize data first (important for DBSCAN)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
dbscan = DBSCAN(eps=0.8, min_samples=5)
df_db = df.copy()
df_db['cluster'] = dbscan.fit_predict(X_scaled)
df_db = add_pca(df_db, X_scaled)
plt.figure()
sns.scatterplot(data=df_db, x='PC1', y='PC2', hue='cluster', palette='Set3')
plt.title("DBSCAN Clustering")
plt.show()
X_petal = df[['petal length (cm)', 'petal width (cm)']].values
kmeans_petal = KMeans(n_clusters=3, random_state=42)
df_petal = df.copy()
df_petal['cluster'] = kmeans_petal.fit_predict(X_petal)
df_petal = add_pca(df_petal, X_petal)
plt.figure()
sns.scatterplot(data=df_petal, x='PC1', y='PC2', hue='cluster', palette='coolwarm')
plt.title("KMeans on Petal Features Only")
plt.show()
df_labels = add_pca(df.copy(), X)
df_labels['label'] = iris.target
plt.figure()
sns.scatterplot(data=df_labels, x='PC1', y='PC2', hue='label', palette='Dark2')
plt.title("Actual Iris Species Labels")
plt.show()
ENSEMBLE METHODS
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,
GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("🌳 Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
ada = AdaBoostClassifier(n_estimators=50, random_state=42)
ada.fit(X_train, y_train)
y_pred_ada = ada.predict(X_test)
print("⚡ AdaBoost Accuracy:", accuracy_score(y_test, y_pred_ada))
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)
print("📈 Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))
log_clf = LogisticRegression(max_iter=200)
svc_clf = SVC(probability=True)
voting_clf = VotingClassifier(
estimators=[('lr', log_clf), ('rf', rf), ('svc', svc_clf)],
voting='hard' # can also be 'soft'
)
voting_clf.fit(X_train, y_train)
y_pred_vote = voting_clf.predict(X_test)
print(" Voting Classifier Accuracy:", accuracy_score(y_test, y_pred_vote))
MODEL EVALUATION
from sklearn.datasets import load_iris, load_diabetes
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,
roc_curve, auc
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target
# Binary classification for ROC demo
y_binary = (y == 2).astype(int) # classify class 2 vs others
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.3,
random_state=42)
# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("✅ Accuracy Score:", acc)
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
print("📊 Classification Report:")
print(classification_report(y_test, y_pred))
y_proba = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()
# Load diabetes dataset (regression)
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Train a linear regression model
reg = LinearRegression()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(f"📈 MAE: {mae:.2f}")
print(f"📉 MSE: {mse:.2f}")
print(f"📏 RMSE: {rmse:.2f}")
print(f"🔢 R² Score: {r2:.2f}")
NEURAL NETWORK
#Örnek 1: Basit Sınıflandırma (Digits Dataset)
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
# Veri setini yükle
digits = load_digits()
X, y = digits.data, digits.target
# Eğitim ve test veri setine ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# MLP modeli oluştur
mlp = MLPClassifier(hidden_layer_sizes=(32,), max_iter=300, random_state=1)
mlp.fit(X_train, y_train)
# Tahmin ve başarı
y_pred = mlp.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
#✅ Örnek 2: XOR Problemi (2 Girişli Mantıksal Problem)
import numpy as np
from sklearn.neural_network import MLPClassifier
# XOR verisi
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([0, 1, 1, 0])
# Model
model = MLPClassifier(hidden_layer_sizes=(4,), activation='tanh', max_iter=1000)
model.fit(X, y)
# Tahmin
print("Predictions:", model.predict(X))
#✅ Örnek 3: Tek Girişli Regresyon (sin fonksiyonu)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
# sin(x) verisi üret
X = np.linspace(0, 2*np.pi, 100).reshape(-1, 1)
y = np.sin(X).ravel()
# MLP Regressor
mlp = MLPRegressor(hidden_layer_sizes=(10,10), max_iter=1000)
mlp.fit(X, y)
# Tahmin & Görselleştirme
y_pred = mlp.predict(X)
plt.plot(X, y, label='Actual')
plt.plot(X, y_pred, label='Prediction')
plt.legend()
plt.title("sin(x) regression")
plt.show()
#✅ Örnek 4: Çok Katmanlı (MLP) – Wine Dataset
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
# Veri seti
data = load_wine()
X, y = data.data, data.target
# Normalizasyon
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Veri ayırma
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3,
random_state=0)
# MLP model
clf = MLPClassifier(hidden_layer_sizes=(50,), max_iter=500)
clf.fit(X_train, y_train)
# Sonuçlar
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
#Örnek 5 Basit MLP: AND problemi (Sigmoid aktivasyon, ağırlık güncelleme)
import numpy as np
# Sigmoid aktivasyon fonksiyonu ve türevi
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_deriv(x):
return sigmoid(x) * (1 - sigmoid(x))
# Giriş (input) verisi - AND problemi
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) # 4 örnek
y = np.array([[0], [0], [0], [1]]) # Hedef sonuçlar
# Ağırlıklar ve bias - rastgele başlat
np.random.seed(0)
weights = np.random.rand(2, 1) # 2 giriş -> 1 çıkış
bias = np.random.rand(1)
# Eğitim parametreleri
lr = 0.1 # öğrenme oranı
epochs = 10000 # kaç tekrar
# Eğitim döngüsü
for epoch in range(epochs):
# 1. İleri yayılım (forward propagation)
z = np.dot(X, weights) + bias
output = sigmoid(z)
# 2. Hata hesabı
error = y - output
# 3. Geri yayılım (backpropagation)
d_weights = np.dot(X.T, error * sigmoid_deriv(z))
d_bias = np.sum(error * sigmoid_deriv(z))
# 4. Güncelleme
weights += lr * d_weights
bias += lr * d_bias
# Durumu yazdır (isteğe bağlı)
if epoch % 2000 == 0:
loss = np.mean(np.abs(error))
print(f"Epoch {epoch}, Loss: {loss:.4f}")
# Sonuçları yazdır
print("\nResults:")
for i in range(len(X)):
result = sigmoid(np.dot(X[i], weights) + bias)
print(f"input: {X[i]}, prediction: {result[0]:.4f}")
TIME SERIES AND FORECASTING
# Example 1: ARIMA Forecasting with Airline Dataset
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.datasets import get_rdataset
# Load dataset
data = get_rdataset('AirPassengers').data
data['time'] = pd.date_range(start='1949-01', periods=len(data), freq='M')
data.set_index('time', inplace=True)
# Plot
data.plot(title='Monthly Air Passengers')
plt.ylabel('Passengers')
plt.show()
# Fit ARIMA Model
model = ARIMA(data['value'], order=(2, 1, 2)) # ARIMA(p,d,q)
model_fit = model.fit()
print(model_fit.summary())
# Forecast
forecast = model_fit.forecast(steps=12)
forecast.plot(title="12-Month Forecast")
plt.show()
# Example 2: Forecasting with Prophet
!pip install prophet
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
# Load dataset
df = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/
airline-passengers.csv")
df.columns = ['ds', 'y']
# Fit model
model = Prophet()
model.fit(df)
# Forecast next 12 months
future = model.make_future_dataframe(periods=12, freq='M')
forecast = model.predict(future)
# Plot
model.plot(forecast)
plt.title('Forecast with Prophet')
plt.show()
# Example 3: Machine Learning Approach (Random Forest)
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# Load data
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-
passengers.csv"
df = pd.read_csv(url)
df.columns = ['Month', 'Passengers']
df['Month'] = pd.to_datetime(df['Month'])
df.set_index('Month', inplace=True)
# Create lag features
for lag in range(1, 13):
df[f'lag_{lag}'] = df['Passengers'].shift(lag)
df.dropna(inplace=True)
# Split data
train = df.iloc[:-12]
test = df.iloc[-12:]
X_train = train.drop('Passengers', axis=1)
y_train = train['Passengers']
X_test = test.drop('Passengers', axis=1)
y_test = test['Passengers']
# Train model
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)
# Predict
preds = model.predict(X_test)
# Plot
plt.plot(y_test.index, y_test, label='Actual')
plt.plot(y_test.index, preds, label='Predicted')
plt.legend()
plt.title('Random Forest Forecasting')
plt.show()
# RMSE
print("RMSE:", mean_squared_error(y_test, preds, squared=False))