Experiment-02
AIM: Implement and demonstrate Multiple Linear Regression based on a given set of training
data samples. Read the training data from a .CSV file.
Solution:
Note: Use 25_CompList.csv provided
The Full Code:
# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
#importing datasets
data_set= pd.read_csv('25_CompList.csv')
#Extracting Independent and dependent Variable
x= data_set.iloc[:, :-1].values
y= data_set.iloc[:, 4].values
#Catgorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
labelencoder_x= LabelEncoder()
x[:, 3]= labelencoder_x.fit_transform(x[:,3])
columnTransformer = ColumnTransformer([('encoder', OneHotEncoder(), [3])], remaind
er='passthrough')
x=nm.array(columnTransformer.fit_transform(x),dtype=nm.str)
#avoiding the dummy variable trap:
x = x[:, 1:]
# Splitting the dataset into training and test set.
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.2, random_st
ate=0)
#Fitting the MLR model to the training set:
from sklearn.linear_model import LinearRegression
regressor= LinearRegression()
regressor.fit(x_train, y_train)
#Predicting the Test set result;
y_pred= regressor.predict(x_test)
print('Train Score: ', regressor.score(x_train, y_train))
print('Test Score: ', regressor.score(x_test, y_test))