Experiment: Data-Preprocessing
Dataset: The dataset Data.csv is provided
#Data Preprocessing
#Importing the Libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
#Importing the Dataset
dataset = pd.read_csv('Data.csv')
array=dataset.values
X=array[:,0:3]
Y=array[:,-1]
#Handle missing data
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan,
strategy="mean")
X[:,1:3] = imputer.fit_transform(X[:,1:3])
#Encoding Categorical Data
from sklearn.preprocessing import LabelEncoder,
OneHotEncoder
labelencoder = LabelEncoder()
X[:,0]=labelencoder.fit_transform(X[:,0])
onehotencoder = OneHotEncoder(categorical_features
=[0])
X = onehotencoder.fit_transform(X).toarray()
labelencoder_Y = LabelEncoder()
Y=labelencoder.fit_transform(Y)
#Splitting the dataset into Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test =
train_test_split(X,Y,test_size=0.2, random_state=0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)