Linear Regression for TIPs Dataset
# importing library
import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import train_test_split, cross_val_score
# load dataset tips = sns.load_dataset('tips') # display the first few rows of the dataset tips.head()# Missing Values tips.isnull().sum()# Define the independent variable (X) and dependent variable (y) X = tips[['total_bill']] y = tips['tip'] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create a Linear Regression model model = LinearRegression()# Fit the model to the training data model.fit(X_train, y_train)# Calculate the coefficients coefficients = model.coef_ intercept = model.intercept_ print("Coefficients:", coefficients[0]) print("Intercept:", intercept)# Make predictions on the testing data y_pred = model.predict(X_test)# Calculate the Mean Squared Error and R^2 score mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) print("Mean Squared Error:", mse) print("R^2 Score:", r2)# Visualize the linear regression line plt.figure(figsize=(10, 6)) plt.scatter(X_test, y_test, color='blue', label='Actual data') plt.plot(X_test, y_pred, color='red', linewidth=2, label='Linear regression line') plt.xlabel('Total Bill ($)') plt.ylabel('Tip ($)') plt.title('Tip Amount Prediction') plt.legend() plt.tight_layout() plt.show()
Comments