# Importing required libraries
import pandas as pd # For data manipulation
from sklearn.tree import DecisionTreeClassifier # To create a Decision Tree model
from sklearn.model_selection import train_test_split # To split the data into
training and testing sets
from sklearn import metrics # To evaluate the model's performance
# Loading the dataset
data1 = pd.read_csv('/content/drive/My Drive/logisticDS.csv') # Reads a CSV file
into a pandas DataFrame
print(data1.head()) # Displays the first few rows of the dataset for a quick
preview
# Defining feature columns and target variable
fcols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age']
x = data1[fcols] # Selecting the features (independent variables)
y = data1.Outcome # Selecting the target (dependent variable), typically
indicating a classification outcome
# Splitting the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3,
random_state=1)
# `train_test_split` splits the data into 70% training and 30% testing subsets.
Setting `random_state=1` ensures reproducibility.
# Initializing and training the Decision Tree Classifier
classify = DecisionTreeClassifier(max_depth=3) # Creating a Decision Tree with a
maximum depth of 3
classify1 = classify.fit(xtrain, ytrain) # Fitting the classifier to the training
data
# Making predictions with the trained model on the test set
ypred = classify1.predict(xtest) # Predicting the target for the test set
# Evaluating the model's accuracy
print("Accuracy:", metrics.accuracy_score(ytest, ypred)) # Outputs the model
accuracy by comparing actual and predicted values
# Visualizing the Decision Tree
from matplotlib import pyplot as plt
from sklearn import tree
fig = plt.figure() # Initializes a new figure
tree.plot_tree(classify1) # Plots the trained Decision Tree structure
plt.show() # Displays the plot
fig.savefig("dt.png") # Saves the Decision Tree plot as an image file named
'dt.png'