import numpy as np import pandas as pd from sklearn.datasets import make_classification from sklearn.decomposition import PCA import matplotlib.pyplot as plt # Step 1: Generate a High-Dimensional Dataset # Create a synthetic dataset with 100 features X, y = make_classification(n_samples=500, n_features=100, n_informative=10, n_redundant=20, random_state=42) # Convert the data to a DataFrame for easy manipulation data = pd.DataFrame(X) print("Original Data Shape:", data.shape) # Step 2: Apply PCA for Dimensionality Reduction # Specify the number of components to retain (e.g., keep 2 components for visualization) pca = PCA(n_components=2) reduced_data = pca.fit_transform(data) # Check the shape of the reduced data print("Reduced Data Shape:", reduced_data.shape) # Step 3: Check Explained Variance # This shows how much variance is retained by the selected components explained_variance = pca.explained_variance_ratio_ print("\nExplained Variance by each principal co...