0% found this document useful (0 votes)
21 views3 pages

Python Data Analysis for ML

-
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views3 pages

Python Data Analysis for ML

-
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

6/8/24, 6:29 PM Machine Learning.

ipynb - Colab

Tugas Phyton for Data Analisis (Machine Learning) - Ali Al Faruq Rahmatillah - 9882405221121004

from google.colab import drive


drive.mount('/content/drive')

Mounted at /content/drive

import pandas as pd

# Path file di Google Drive


file_path = '/content/drive/MyDrive/bank-train.csv'

# Membaca file CSV


data = pd.read_csv(file_path)

# Menampilkan beberapa baris awal dataset


print(data.head())

id age job marital education default housing loan \


0 12556 40 blue-collar married basic.9y unknown yes no
1 35451 31 admin. married university.degree no no no
2 30592 59 retired married basic.4y no no no
3 17914 43 housemaid divorced basic.9y no yes no
4 3315 39 admin. single high.school unknown no no

contact month ... campaign pdays previous poutcome emp.var.rate \


0 telephone jul ... 2 999 0 nonexistent 1.4
1 cellular may ... 4 999 0 nonexistent -1.8
2 cellular may ... 6 999 1 failure -1.8
3 cellular jul ... 5 999 0 nonexistent 1.4
4 telephone may ... 2 999 0 nonexistent 1.1

cons.price.idx cons.conf.idx euribor3m nr.employed y


0 93.918 -42.7 4.960 5228.1 0
1 92.893 -46.2 1.244 5099.1 0
2 92.893 -46.2 1.354 5099.1 0
3 93.918 -42.7 4.961 5228.1 0
4 93.994 -36.4 4.860 5191.0 0

[5 rows x 22 columns]

import pandas as pd

# Memuat dataset
data = pd.read_csv('/content/drive/MyDrive/bank-train.csv')

# Menampilkan beberapa baris awal dataset


print(data.head())

id age job marital education default housing loan \


0 12556 40 blue-collar married basic.9y unknown yes no
1 35451 31 admin. married university.degree no no no
2 30592 59 retired married basic.4y no no no
3 17914 43 housemaid divorced basic.9y no yes no
4 3315 39 admin. single high.school unknown no no

contact month ... campaign pdays previous poutcome emp.var.rate \


0 telephone jul ... 2 999 0 nonexistent 1.4
1 cellular may ... 4 999 0 nonexistent -1.8
2 cellular may ... 6 999 1 failure -1.8
3 cellular jul ... 5 999 0 nonexistent 1.4
4 telephone may ... 2 999 0 nonexistent 1.1

cons.price.idx cons.conf.idx euribor3m nr.employed y


0 93.918 -42.7 4.960 5228.1 0
1 92.893 -46.2 1.244 5099.1 0
2 92.893 -46.2 1.354 5099.1 0
3 93.918 -42.7 4.961 5228.1 0
4 93.994 -36.4 4.860 5191.0 0

[5 rows x 22 columns]

https://colab.research.google.com/drive/1ARNPIwE6NvW8hE9pAYsqahZnDOp-5zH_#scrollTo=rf5ikIApm5xG&printMode=true 1/3
6/8/24, 6:29 PM Machine Learning.ipynb - Colab
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Menghilangkan baris dengan nilai yang hilang


data = data.dropna()

# Meng-encode variabel kategorikal


label_encoders = {}
categorical_columns = data.select_dtypes(include=['object']).columns
for column in categorical_columns:
le = LabelEncoder()
data[column] = le.fit_transform(data[column])
label_encoders[column] = le

# Memisahkan fitur dan target


X = data.drop('y', axis=1)
y = data['y']

# Normalisasi fitur
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

from sklearn.ensemble import RandomForestClassifier

# Membuat dan melatih model Random Forest


model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

▾ RandomForestClassifier
RandomForestClassifier(random_state=42)

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Prediksi pada set pengujian


y_pred = model.predict(X_test)

# Evaluasi model
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

conf_matrix, class_report, accuracy

(array([[5641, 207],
[ 360, 382]]),
' precision recall f1-score support\n\n 0 0.94 0.96 0.95 5848\n 1
0.65 0.51 0.57 742\n\n accuracy 0.91 6590\n macro avg 0.79 0.74
0.76 6590\nweighted avg 0.91 0.91 0.91 6590\n',
0.9139605462822459)

import matplotlib.pyplot as plt


import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Prediksi pada set pengujian


y_pred = model.predict(X_test)

# Menghitung confusion matrix


conf_matrix = confusion_matrix(y_test, y_pred)

# Menampilkan confusion matrix dalam bentuk visual


plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Predicted No', 'Predicted Yes'], yticklabels=['Actual No', 'Actual
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Menampilkan classification report dan accuracy score


class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
i t("Cl ifi ti R t \ " l t)
https://colab.research.google.com/drive/1ARNPIwE6NvW8hE9pAYsqahZnDOp-5zH_#scrollTo=rf5ikIApm5xG&printMode=true 2/3
6/8/24, 6:29 PM Machine Learning.ipynb - Colab
print("Classification Report:\n", class_report)
print("Accuracy Score:", accuracy)

Classification Report:
precision recall f1-score support

0 0.94 0.96 0.95 5848


1 0.65 0.51 0.57 742

accuracy 0.91 6590


macro avg 0.79 0.74 0.76 6590
weighted avg 0.91 0.91 0.91 6590

Accuracy Score: 0.9139605462822459

print("Terima kasih!")

Terima kasih!

https://colab.research.google.com/drive/1ARNPIwE6NvW8hE9pAYsqahZnDOp-5zH_#scrollTo=rf5ikIApm5xG&printMode=true 3/3

You might also like