# Importing necessary libraries
import pandas as pd # For data manipulation and analysis
import numpy as np # For numerical operations
import matplotlib.pyplot as plt # For data visualization
from sklearn.model_selection import train_test_split # For splitting dataset into training and testing
sets
from sklearn.linear_model import LinearRegression # For performing linear regression
from sklearn import metrics # For evaluating model performance
# Loading the dataset from a CSV file
dataset = pd.read_csv("C:/Users/DELL/OneDrive/Desktop/New folder/newclass/trial/Advertising.csv")
# Selecting the independent variable (TV advertising budget) and dependent variable (Sales)
x = dataset[['TV']] # Independent variable (features)
y = dataset['Sales'] # Dependent variable (target)
# Splitting the dataset into training (70%) and testing (30%) sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=100)
# Creating a Simple Linear Regression model
slr = LinearRegression()
# Training the model using the training data
slr.fit(x_train, y_train)
# Printing the intercept (constant term in the linear equation)
print("Intercept:", slr.intercept_)
# Printing the coefficient (slope of the regression line)
print("Coefficient:", slr.coef_)
# Making predictions on the test data
y_pred_slr = slr.predict(x_test)
# Printing the predicted values for the test set
print("Prediction of test set:", y_pred_slr)
# Creating a DataFrame to compare actual vs. predicted values
slr_diff = pd.DataFrame({'Actual value': y_test, 'Predicted Value': y_pred_slr})
# Displaying the first few rows of the comparison DataFrame
slr_diff.head()
# Plotting the actual vs. predicted values
plt.scatter(x_test, y_test, label="Actual Values") # Scatter plot of actual values
plt.plot(x_test, y_pred_slr, color='red', label="Regression Line") # Regression line in red
plt.legend() # Adding legend to the plot
plt.show() # Displaying the plot
# Calculating mean absolute error (MAE) - average of absolute differences between actual and
predicted values
meanAbErr = metrics.mean_absolute_error(y_test, y_pred_slr)
# Calculating mean squared error (MSE) - average of squared differences between actual and
predicted values
meanSqErr = metrics.mean_squared_error(y_test, y_pred_slr)
# Calculating root mean squared error (RMSE) - square root of the mean squared error
rootMeanSqErr = np.sqrt(metrics.mean_squared_error(y_test, y_pred_slr))
# Printing the R-squared value (model accuracy percentage)
print('Root Squared: {:2f}'.format(slr.score(x, y) * 100))
# Printing the Mean Absolute Error
print('Mean Absolute Error:', meanAbErr)
# Printing the Mean Squared Error
print('Mean squared Error:', meanSqErr)
# Printing the Root Mean Squared Error
print('Root Mean Squared Error:', rootMeanSqErr)