# 1. Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
# 2. Load dataset using read_csv
df = pd.read_csv('iris.csv') # <-- your file name
# 3. Separate features (assuming 'species' is the label column)
X = df.drop('species', axis=1)
# 4. Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 5. Calculate covariance matrix
cov_matrix = np.cov(X_scaled.T)
# 6. Calculate eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
# 7. Sort eigenvalues and eigenvectors
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]
# 8. Plot explained variance
explained_variance_ratio = eigenvalues / np.sum(eigenvalues)
plt.figure(figsize=(6,4))
plt.bar(range(1, len(explained_variance_ratio)+1), explained_variance_ratio, alpha=0.7, align='center')
plt.xlabel('Principal Components')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance by Components')
plt.show()
# 9. Choose first k eigenvectors (let's take 2 for 2D)
k = 2
eigenvectors_k = eigenvectors[:, :k]
# 10. Transform the original data
X_pca = X_scaled.dot(eigenvectors_k)
# 11. Plot PCA Result
plt.figure(figsize=(6,6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=pd.factorize(df['species'])[0], cmap='viridis')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA Result (Iris Dataset)')
plt.colorbar()
plt.show()
Comments
Post a Comment