3/7/24, 14:31 Untitled33.
ipynb - Colab
import pandas as pd
import numpy as np
import [Link] as plt
df= pd.read_csv('[Link]')
df
Age Experience Rank Nationality Go
0 36 10 9 UK NO
1 42 12 4 USA NO
2 23 4 6 N NO
3 52 4 4 USA NO
4 43 21 8 USA YES
5 44 14 5 UK NO
6 66 3 7 N YES
7 35 14 9 UK YES
8 52 13 7 N YES
9 35 5 9 N YES
10 24 3 5 USA NO
11 18 3 7 UK YES
12 45 9 9 UK YES
#conversion de texto a numericos
from [Link] import LabelEncoder
def label_encoder(datos_categoria):
le = LabelEncoder()
df[datos_categoria] = le.fit_transform(df[datos_categoria])
variables = ["Nationality","Go"]
for l in variables:
label_encoder(l)
#verifica que las variables "Nacionality" y "Go" se cambiaron a valores numéricos
df
Age Experience Rank Nationality Go
0 36 10 9 1 0
1 42 12 4 2 0
2 23 4 6 0 0
3 52 4 4 2 0
4 43 21 8 2 1
5 44 14 5 1 0
6 66 3 7 0 1
7 35 14 9 1 1
8 52 13 7 0 1
9 35 5 9 0 1
10 24 3 5 2 0
11 18 3 7 1 1
12 45 9 9 1 1
#separo en dos partes el data frame
y = df["Go"]
x = [Link]("Go",axis=1)
[Link] 1/3
3/7/24, 14:31 [Link] - Colab
print("Contenido de y (columna 'Go'):")
print(x)
Contenido de y (columna 'Go'):
Age Experience Rank Nationality
0 36 10 9 1
1 42 12 4 2
2 23 4 6 0
3 52 4 4 2
4 43 21 8 2
5 44 14 5 1
6 66 3 7 0
7 35 14 9 1
8 52 13 7 0
9 35 5 9 0
10 24 3 5 2
11 18 3 7 1
12 45 9 9 1
#entreno el arbol
from [Link] import DecisionTreeClassifier
arbol = DecisionTreeClassifier()
[Link](x,y)
arbol.feature_importances_
array([0.07738095, 0.1547619 , 0.76785714, 0. ])
from sklearn import tree
tree.plot_tree(arbol,feature_names=[Link],rounded=True,filled=True)
[Text(0.2857142857142857, 0.9, 'Rank <= 6.5\ngini = 0.497\nsamples = 13\nvalue = [6,
7]'),
Text(0.14285714285714285, 0.7, 'gini = 0.0\nsamples = 5\nvalue = [5, 0]'),
Text(0.42857142857142855, 0.7, 'Rank <= 8.5\ngini = 0.219\nsamples = 8\nvalue = [1,
7]'),
Text(0.2857142857142857, 0.5, 'gini = 0.0\nsamples = 4\nvalue = [0, 4]'),
Text(0.5714285714285714, 0.5, 'Age <= 35.5\ngini = 0.375\nsamples = 4\nvalue = [1,
3]'),
Text(0.42857142857142855, 0.3, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'),
Text(0.7142857142857143, 0.3, 'Experience <= 9.5\ngini = 0.5\nsamples = 2\nvalue =
[1, 1]'),
Text(0.5714285714285714, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'),
Text(0.8571428571428571, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]')]
[Link] 2/3
3/7/24, 14:31 [Link] - Colab
[Link] 3/3