Develop a program to implement Principal Component Analysis (PCA) for
reducing the dimensionality of the Iris dataset from 4 features to 2.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
# Load the Iris dataset
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['Species'] = data.target # Add target labels
species_names = dict(enumerate(data.target_names)) # Map target
values to species names
df['Species'] = df['Species'].map(species_names)
# Standardize the features
scaler = StandardScaler()
df_features = df.drop(columns=['Species']) # Exclude target column
scaled_features = scaler.fit_transform(df_features)
# Apply PCA to reduce dimensions from 4 to 2
pca = PCA(n_components=2)
principal_components = pca.fit_transform(scaled_features)
# Create a DataFrame with the principal components
pca_df = pd.DataFrame(principal_components, columns=['PC1',
'PC2'])
pca_df['Species'] = df['Species']
pca_df
# Plot the PCA results
plt.figure(figsize=(8, 6))
sns.scatterplot(x='PC1', y='PC2', hue='Species', data=pca_df,
palette='viridis', s=100, edgecolor='black')
plt.title('PCA of Iris Dataset (2 Components)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title='Species')
plt.grid(True)
plt.show()
# Explained variance ratio
explained_variance = pca.explained_variance_ratio_
print("Explained variance by each principal component:",
explained_variance)