#Load libraries
import pandas as pd
from sklearn.model_selection import train_test_split
#Import train teat aplit function
#load dataset
data=pd.read_csv("/content/drive/MyDrive/Jupyter/dataset3.csv")
print (data)
#split dataset in features and target variable
X=data.iloc[:,1:9]
Y=data.iloc[:,-1]
#Split dataset into training set and test set
X_train, X_test, y_train, y_test =train_test_split(X, Y, test_size= 0.3, random_state=1) # 70% training and 30% test
print("The training data for X is \n\n", X_train)
print("The training data for Y is \n\n",y_train)
0 1 6 148 72 35 0 33.6 0.627 50
1 2 1 85 66 29 0 26.6 0.351 31
2 3 8 183 64 0 0 23.3 0.672 32
3 4 1 89 66 23 94 28.1 0.167 21
4 5 0 137 40 35 168 43.1 2.288 33
5 6 5 116 74 0 0 25.6 0.201 30
6 7 3 78 50 32 88 31.0 0.248 26
7 8 10 115 0 0 0 35.3 0.134 29
8 9 2 197 70 45 543 30.5 0.158 53
9 10 8 125 96 0 0 0.0 0.232 54
10 11 4 110 92 0 0 37.6 0.191 30
11 12 10 168 74 0 0 38.0 0.537 34
12 13 10 139 80 0 0 27.1 1.441 57
13 14 1 189 60 23 846 30.1 0.398 59
14 15 5 166 72 19 175 25.8 0.587 51
Label
0 1
1 0
2 1
3 0
4 1
5 0
6 1
7 0
8 1
9 1
10 0
11 1
12 0
13 1
14 1
The training data for X is
Pregnant Glucose Bp Skin Insulin Bmi Pedigree Age
4 0 137 40 35 168 43.1 2.288 33
1 1 85 66 29 0 26.6 0.351 31
13 1 189 60 23 846 30.1 0.398 59
0 6 148 72 35 0 33.6 0.627 50
14 5 166 72 19 175 25.8 0.587 51
9 8 125 96 0 0 0.0 0.232 54
8 2 197 70 45 543 30.5 0.158 53
12 10 139 80 0 0 27.1 1.441 57
11 10 168 74 0 0 38.0 0.537 34
5 5 116 74 0 0 25.6 0.201 30
The training data for Y is
4 1
1 0
13 1
0 1
14 1
9 1
8 1
12 0
11 1
5 0
Name: Label, dtype: int64
print("The TEST data for X is \n\n", X_test)
print("The TEST data for Y is \n\n",y_test)
The TEST data for X is
Pregnant Glucose Bp Skin Insulin Bmi Pedigree Age
3 1 89 66 23 94 28.1 0.167 21
7 10 115 0 0 0 35.3 0.134 29
6 3 78 50 32 88 31.0 0.248 26
2 8 183 64 0 0 23.3 0.672 32
10 4 110 92 0 0 37.6 0.191 30
The TEST data for Y is