In [ ]: # importing modules and packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn import preprocessing
In [ ]: from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [ ]: df = pd.read_csv('/content/drive/MyDrive/data/train.csv')
# df.drop('No', inplace=True, axis=1)
print(df.head())
print(df.columns)
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \
0 1 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0
3 4 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0
pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 \
0 0 ... 0 0 0 0 0 0
1 0 ... 0 0 0 0 0 0
2 0 ... 0 0 0 0 0 0
3 0 ... 0 0 0 0 0 0
4 0 ... 0 0 0 0 0 0
pixel780 pixel781 pixel782 pixel783
0 0 0 0 0
1 0 0 0 0
2 0 0 0 0
3 0 0 0 0
4 0 0 0 0
[5 rows x 785 columns]
Index(['label', 'pixel0', 'pixel1', 'pixel2', 'pixel3', 'pixel4', 'pixel5',
'pixel6', 'pixel7', 'pixel8',
...
'pixel774', 'pixel775', 'pixel776', 'pixel777', 'pixel778', 'pixel779',
'pixel780', 'pixel781', 'pixel782', 'pixel783'],
dtype='object', length=785)
In [ ]: sns.scatterplot(x='pixel0',
y='label', data=df)
print(df.shape)
# creating feature variables
y = df['label']
X = df.drop('label', axis=1)
# print(X)
# print(y)
(42000, 785)
In [ ]: # creating train and test sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=101)
# creating a regression model
model = LinearRegression()
# fitting the model
model.fit(X_train, y_train)
# making predictions
predictions = model.predict(X_test)
# model evaluation
print('mean_squared_error : ', mean_squared_error(y_test, predictions))
print('mean_absolute_error : ', mean_absolute_error(y_test, predictions))
mean_squared_error : 1.1646355758578547e+18
mean_absolute_error : 15509186.729803031
Titanic Predictions
In [ ]: df = pd.read_csv('/content/drive/MyDrive/data/titanic.csv')
df.drop({'passengerid','name','fare','embarked','cabin','ticket'}, inplace=True, axis=1)
print(df.head())
print(df.columns)
pclass survived sex age sibsp parch
0 1 1 female 29.0000 0 0
1 1 1 male 0.9167 1 2
2 1 0 female 2.0000 1 2
3 1 0 male 30.0000 1 2
4 1 0 female 25.0000 1 2
Index(['pclass', 'survived', 'sex', 'age', 'sibsp', 'parch'], dtype='object')
In [ ]: df = df.fillna(0)
In [ ]: df = df.replace({'sex': {'male': 0, 'female': 1}})
In [ ]: y = df['survived']
X = df.drop('survived', axis=1)
# creating train and test sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=101)
# creating a regression model
model = LinearRegression()
# fitting the model
model.fit(X_train, y_train)
# making predictions
predictions = model.predict(X_test)
# model evaluation
print('mean_squared_error : ', mean_squared_error(y_test, np.round(predictions)))
print('mean_absolute_error : ', mean_absolute_error(y_test, np.round(predictions)))
mean_squared_error : 0.22137404580152673
mean_absolute_error : 0.22137404580152673
In [ ]: def plot_confusion_matrix(cm, classes,normalize=False,title='Confusion matrix',cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
# print(cm)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
In [ ]: predictions=np.round(predictions)
# print(y_test)
In [ ]: from sklearn.metrics import confusion_matrix
import itertools
cm = confusion_matrix(y_true=y_test, y_pred=predictions)
cm_plot_labels = ["Met Rose","Met Jack"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')
Confusion matrix, without normalization