# Step 1: Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_wine
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_wine
# Step 2: Load the Wine Dataset
wine = load_wine()
data = pd.DataFrame(wine.data, columns=wine.feature_names)
# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(data.head())
# Step 3: Data Preprocessing
# Standardize the features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)
print("\nStandardized Data (first few rows):")
print(scaled_data[:5])
# Step 4: Apply K-means Clustering
# Setting the number of clusters directly (e.g., k = 3)
k = 3
kmeans = KMeans(n_clusters=k, random_state=42)
clusters = kmeans.fit_predict(scaled_data)
# Add cluster labels to the original data
data['Cluster'] = clusters
# Print Centroid Values
centroids = kmeans.cluster_centers_
print("\nCentroid values (scaled):")
print(centroids)
# Step 5: Visualize the Clusters
# We will plot only the first two features for simplicity
plt.figure(figsize=(10, 6))
plt.scatter(scaled_data[:, 0], scaled_data[:, 1], c=clusters, cmap='viridis')
plt.scatter(centroids[:, 0], centroids[:, 1], s=300, c='red', marker='X', label='Centroids')
plt.title('K-means Clustering on Wine Dataset')
plt.xlabel('Feature 1 (alcohol)')
plt.ylabel('Feature 2 (malic_acid)')
plt.colorbar(label='Cluster')
plt.legend()
plt.show()
# Step 6: Analyze Results
# Print mean values of each feature per cluster
cluster_analysis = data.groupby('Cluster').mean()
print("\nMean values of each feature per cluster:")
print(cluster_analysis)
Output:
Mean values of each feature per cluster:
alcohol malic_acid ash alcalinity_of_ash magnesium \
Cluster
0 12.250923 1.897385 2.231231 20.063077 92.738462
1 13.134118 3.307255 2.417647 21.241176 98.666667
2 13.676774 1.997903 2.466290 17.462903 107.967742
total_phenols flavanoids nonflavanoid_phenols proanthocyanins \
Cluster
0 2.247692 2.050000 0.357692 1.624154
1 1.683922 0.818824 0.451961 1.145882
2 2.847581 3.003226 0.292097 1.922097
color_intensity hue od280/od315_of_diluted_wines proline
Cluster
0 2.973077 1.062708 2.803385 510.169231
1 7.234706 0.691961 1.696667 619.058824
2 5.453548 1.065484 3.163387 1100.225806
Comments