Department of Electronics and Telecommunication Engineering
Ramaiah Institute of Technology
M.S.R. Nagar, Bangalore-54
MACHINE LEARNING (ETE631)
ASSIGNMENT 3
Name: R Jeevan Kumar
USN: 1MS19ET042
Submitted to
Dr. Shobha K R
Associate
Professor Dept of
ETE
2021-2022
All code uploaded to [Link]
1. Develop a model using regression which can predict the housing price in Boston using
python/ scikit learn
Python code:
# Predict housing prices using Linear Regression
from [Link] import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from [Link] import load_boston
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
import [Link] as plt
print("\n----------- HOUSING PRICE PREDICTOR------------\n")
data = load_boston()
array = data.feature_names
print(array)
array = [Link](array,['medv'])
data, target = [Link], [Link]
Xtrain, Xtest, Ytrain, Ytest = train_test_split(data,target,test_size=0.3)
print([Link],[Link])
print([Link],[Link])
lin_model = LinearRegression()
lin_model.fit(Xtrain,Ytrain)
Ytrain_predict =
lin_model.predict(Xtrain)
rmse =
([Link](mean_squared_error(Ytrain,Ytrain_predict))) r2 =
r2_score(Ytrain,Ytrain_predict)
print("Model performance for training set is :\n ")
print("Root Mean Square Error: ",rmse,"\n")
print("R2 sore is: ",r2,"\n")
Ytest_predict = lin_model.predict(Xtest)
rmse =
([Link](mean_squared_error(Ytest,Ytest_predict))) r2 =
r2_score(Ytest,Ytest_predict)
print("Model performance for testing set is :\n ")
print("Root Mean Square Error: ",rmse,"\n")
print("R2 sore is: ",r2,"\n")
[Link](Ytest,Ytest_predict,c = 'green')
[Link]("Price in $1000's")
[Link]("Predicted value")
[Link]("True value vs predicted value: Linear Regression")
[Link]()
Output:
2. Implement data classification on diabetic data set using k means clustering
Python code:
# classify diabetes using K means clustering
import pandas as pd
import numpy as np
import [Link] as plt
from sklearn import datasets
from [Link] import MinMaxScaler
import sklearn
from [Link] import KMeans
from [Link] import confusion_matrix
import seaborn as sns
from [Link] import KMeans
from sklearn import preprocessing
from [Link] import StandardScaler
from [Link] import MinMaxScaler
from sklearn.model_selection import train_test_split
if name == " main ":
print ("\n ---------- K MEANS CLUSTERING ON DIABETES DATA----------------
\n")
data = pd.read_csv("./[Link]") #importing files using
pandas dataset_new = data
dataset_new[[
"Glucose",
"BloodPressure",
"SkinThickness",
"Insulin", "BMI"]] =
dataset_new[[
"Glucose",
"BloodPressure",
"SkinThickness",
"Insulin",
"BMI"]].replace(0, [Link])
# Replacing NaN with mean values
dataset_new["Glucose"].fillna(dataset_new["Glucose"].mean(), inplace
=
True)
dataset_new["BloodPressure"].fillna(dataset_new["BloodPressure"].mean(),
inplace = True)
dataset_new["SkinThickness"].fillna(dataset_new["SkinThickness"].mean(),
inplace = True)
dataset_new["Insulin"].fillna(dataset_new["Insulin"].mean(), inplace =
True)
dataset_new["BMI"].fillna(dataset_new["BMI"].mean(), inplace = True)
# Feature scaling using MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
dataset_scaled = sc.fit_transform(dataset_new)
data1 = [Link](dataset_scaled)
# Selecting features - [Glucose, Insulin, BMI]
X = [Link][:, [1, 4, 5]].values
Y = [Link][:, 8].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size =
0.20, random_state = 42, stratify = dataset_new['Outcome'] )
# Checking dimensions
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("Y_train shape:", Y_train.shape)
print("Y_test shape:", Y_test.shape)
KMeans_Clustering = KMeans(n_clusters =2, random_state=0)
KMeans_Clustering.fit(X_train)
print(KMeans_Clustering.cluster_centers_)
#prediction using kmeans and accuracy
kpred = KMeans_Clustering.predict(X_test)
print('Classification report:\n\n',
[Link].classification_report(Y_test,kpred))
outcome_labels = sorted([Link]())
[Link](
confusion_matrix(Y_test, kpred),
annot=True,
xticklabels=outcome_labels,
yticklabels=outcome_labels
)
# Fit again and plot
KMeans_Clustering = KMeans(n_clusters =2, random_state=0)
KMeans_Clustering.fit(X)
[Link]([Link][:, [1]].values,[Link][:, [5]].values,
c=KMeans_Clustering.labels_, cmap='rainbow')
Output: