Linear Regression for TIPs Dataset

 # importing library

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, cross_val_score

# load dataset
tips = sns.load_dataset('tips')

# display the first few rows of the dataset
tips.head()

# Missing Values
tips.isnull().sum()
# Define the independent variable (X) and dependent variable (y)
X = tips[['total_bill']]
y = tips['tip']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Linear Regression model
model = LinearRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

# Calculate the coefficients
coefficients = model.coef_
intercept = model.intercept_

print("Coefficients:", coefficients[0])
print("Intercept:", intercept)


# Make predictions on the testing data
y_pred = model.predict(X_test)

# Calculate the Mean Squared Error and R^2 score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R^2 Score:", r2)

# Visualize the linear regression line
plt.figure(figsize=(10, 6))
plt.scatter(X_test, y_test, color='blue', label='Actual data')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Linear regression line')
plt.xlabel('Total Bill ($)')
plt.ylabel('Tip ($)')
plt.title('Tip Amount Prediction')
plt.legend()
plt.tight_layout()
plt.show()

Comments

Popular posts from this blog

About me

A set of documents that need to be classified, use the Naive Bayesian Classifier

Keras