6/8/24, 6:29 PM Machine Learning.
ipynb - Colab
Tugas Phyton for Data Analisis (Machine Learning) - Ali Al Faruq Rahmatillah - 9882405221121004
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
import pandas as pd
# Path file di Google Drive
file_path = '/content/drive/MyDrive/bank-train.csv'
# Membaca file CSV
data = pd.read_csv(file_path)
# Menampilkan beberapa baris awal dataset
print(data.head())
id age job marital education default housing loan \
0 12556 40 blue-collar married basic.9y unknown yes no
1 35451 31 admin. married university.degree no no no
2 30592 59 retired married basic.4y no no no
3 17914 43 housemaid divorced basic.9y no yes no
4 3315 39 admin. single high.school unknown no no
contact month ... campaign pdays previous poutcome emp.var.rate \
0 telephone jul ... 2 999 0 nonexistent 1.4
1 cellular may ... 4 999 0 nonexistent -1.8
2 cellular may ... 6 999 1 failure -1.8
3 cellular jul ... 5 999 0 nonexistent 1.4
4 telephone may ... 2 999 0 nonexistent 1.1
cons.price.idx cons.conf.idx euribor3m nr.employed y
0 93.918 -42.7 4.960 5228.1 0
1 92.893 -46.2 1.244 5099.1 0
2 92.893 -46.2 1.354 5099.1 0
3 93.918 -42.7 4.961 5228.1 0
4 93.994 -36.4 4.860 5191.0 0
[5 rows x 22 columns]
import pandas as pd
# Memuat dataset
data = pd.read_csv('/content/drive/MyDrive/bank-train.csv')
# Menampilkan beberapa baris awal dataset
print(data.head())
id age job marital education default housing loan \
0 12556 40 blue-collar married basic.9y unknown yes no
1 35451 31 admin. married university.degree no no no
2 30592 59 retired married basic.4y no no no
3 17914 43 housemaid divorced basic.9y no yes no
4 3315 39 admin. single high.school unknown no no
contact month ... campaign pdays previous poutcome emp.var.rate \
0 telephone jul ... 2 999 0 nonexistent 1.4
1 cellular may ... 4 999 0 nonexistent -1.8
2 cellular may ... 6 999 1 failure -1.8
3 cellular jul ... 5 999 0 nonexistent 1.4
4 telephone may ... 2 999 0 nonexistent 1.1
cons.price.idx cons.conf.idx euribor3m nr.employed y
0 93.918 -42.7 4.960 5228.1 0
1 92.893 -46.2 1.244 5099.1 0
2 92.893 -46.2 1.354 5099.1 0
3 93.918 -42.7 4.961 5228.1 0
4 93.994 -36.4 4.860 5191.0 0
[5 rows x 22 columns]
https://colab.research.google.com/drive/1ARNPIwE6NvW8hE9pAYsqahZnDOp-5zH_#scrollTo=rf5ikIApm5xG&printMode=true 1/3
6/8/24, 6:29 PM Machine Learning.ipynb - Colab
from sklearn.preprocessing import LabelEncoder, StandardScaler
# Menghilangkan baris dengan nilai yang hilang
data = data.dropna()
# Meng-encode variabel kategorikal
label_encoders = {}
categorical_columns = data.select_dtypes(include=['object']).columns
for column in categorical_columns:
le = LabelEncoder()
data[column] = le.fit_transform(data[column])
label_encoders[column] = le
# Memisahkan fitur dan target
X = data.drop('y', axis=1)
y = data['y']
# Normalisasi fitur
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
from sklearn.ensemble import RandomForestClassifier
# Membuat dan melatih model Random Forest
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
▾ RandomForestClassifier
RandomForestClassifier(random_state=42)
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# Prediksi pada set pengujian
y_pred = model.predict(X_test)
# Evaluasi model
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix, class_report, accuracy
(array([[5641, 207],
[ 360, 382]]),
' precision recall f1-score support\n\n 0 0.94 0.96 0.95 5848\n 1
0.65 0.51 0.57 742\n\n accuracy 0.91 6590\n macro avg 0.79 0.74
0.76 6590\nweighted avg 0.91 0.91 0.91 6590\n',
0.9139605462822459)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
# Prediksi pada set pengujian
y_pred = model.predict(X_test)
# Menghitung confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
# Menampilkan confusion matrix dalam bentuk visual
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Predicted No', 'Predicted Yes'], yticklabels=['Actual No', 'Actual
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
# Menampilkan classification report dan accuracy score
class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
i t("Cl ifi ti R t \ " l t)
https://colab.research.google.com/drive/1ARNPIwE6NvW8hE9pAYsqahZnDOp-5zH_#scrollTo=rf5ikIApm5xG&printMode=true 2/3
6/8/24, 6:29 PM Machine Learning.ipynb - Colab
print("Classification Report:\n", class_report)
print("Accuracy Score:", accuracy)
Classification Report:
precision recall f1-score support
0 0.94 0.96 0.95 5848
1 0.65 0.51 0.57 742
accuracy 0.91 6590
macro avg 0.79 0.74 0.76 6590
weighted avg 0.91 0.91 0.91 6590
Accuracy Score: 0.9139605462822459
print("Terima kasih!")
Terima kasih!
https://colab.research.google.com/drive/1ARNPIwE6NvW8hE9pAYsqahZnDOp-5zH_#scrollTo=rf5ikIApm5xG&printMode=true 3/3