Evaluation Performance measure of Regression models
You need to read CSV
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import (mean_absolute_error, mean_squared_error, r2_score,
accuracy_score, precision_score, recall_score, f1_score,
confusion_matrix, roc_curve, auc, RocCurveDisplay)
import matplotlib.pyplot as plt
# Assuming you have a dataset loaded as `data`
# For simplicity, let's assume 'X' are the features and 'y' is the target
# Splitting the dataset into train and test sets
X = data.drop(columns=['target'])
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
### Linear Regression ###
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
# Performance metrics for Linear Regression
mae_linear = mean_absolute_error(y_test, y_pred_linear)
mse_linear = mean_squared_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mse_linear)
r2_linear = r2_score(y_test, y_pred_linear)
print("Linear Regression Metrics:")
print(f"MAE: {mae_linear}")
print(f"MSE: {mse_linear}")
print(f"RMSE: {rmse_linear}")
print(f"R²: {r2_linear}")
### Quadratic Regression (Polynomial Regression) ###
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
quadratic_model = LinearRegression()
quadratic_model.fit(X_train_poly, y_train)
y_pred_quad = quadratic_model.predict(X_test_poly)
# Performance metrics for Quadratic Regression
mae_quad = mean_absolute_error(y_test, y_pred_quad)
mse_quad = mean_squared_error(y_test, y_pred_quad)
rmse_quad = np.sqrt(mse_quad)
r2_quad = r2_score(y_test, y_pred_quad)
print("\nQuadratic Regression Metrics:")
print(f"MAE: {mae_quad}")
print(f"MSE: {mse_quad}")
print(f"RMSE: {rmse_quad}")
print(f"R²: {r2_quad}")
### Logistic Regression ###
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)
# Performance metrics for Logistic Regression
accuracy = accuracy_score(y_test, y_pred_logistic)
precision = precision_score(y_test, y_pred_logistic)
recall = recall_score(y_test, y_pred_logistic)
f1 = f1_score(y_test, y_pred_logistic)
conf_matrix = confusion_matrix(y_test, y_pred_logistic)
print("\nLogistic Regression Metrics:")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print("Confusion Matrix:")
print(conf_matrix)
### ROC Curve for Logistic Regression ###
y_pred_proba = logistic_model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
Comments