import pandas as pd
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from itertools import product
file_path = '/mnt/data/SAHeart (2).csv'
df = pd.read_csv(file_path)
df_info = df.info()
class_distribution = df['chd'].value_counts() if 'chd' in df.columns else None
# Separate majority and minority classes
df_majority = df[df['chd'] == 0]
df_minority = df[df['chd'] == 1]
df_minority_upsampled = resample(df_minority,
replace=True, # sample with replacement
n_samples=len(df_majority), # to match majority class
random_state=42) # reproducible results
df_balanced = pd.concat([df_majority, df_minority_upsampled])
df_balanced = df_balanced.drop(columns=['row.names']) # drop row.names as it's irrelevant
X = df_balanced.drop(columns=['chd'])
y = df_balanced['chd']
# Encode categorical feature 'famhist'
X = pd.get_dummies(X, columns=['famhist'], drop_first=True)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize the MLP model with default parameters
mlp_default = MLPClassifier(random_state=42)
mlp_default.fit(X_train, y_train)
y_pred = mlp_default.predict(X_test)
default_accuracy = accuracy_score(y_test, y_pred)
classification_report_default = classification_report(y_test, y_pred)
print("Default Model Accuracy:", default_accuracy)
print("Classification Report:\n", classification_report_default)
# Define and evaluate MLP models with different activation functions
activations = ['identity', 'logistic', 'tanh', 'relu']
activation_results = {}
for activation in activations:
# Initialize and train the model with each activation function
mlp = MLPClassifier(activation=activation, random_state=42, max_iter=500)
mlp.fit(X_train, y_train)
# Predict and evaluate the model
y_pred = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
activation_results[activation] = accuracy
print("Activation Function Results:", activation_results)
# Tune the learning rate using the tanh activation function
learning_rates = [0.1 + 0.02 * i for i in range(16)]
best_accuracy = 0
best_learning_rate = 0
learning_rate_results = {}
for lr in learning_rates:
# Initialize and train the model with each learning rate
mlp = MLPClassifier(activation='tanh', learning_rate_init=lr, random_state=42, max_iter=500)
mlp.fit(X_train, y_train)
# Predict and evaluate the model
y_pred = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
learning_rate_results[lr] = accuracy
if accuracy > best_accuracy:
best_accuracy = accuracy
best_learning_rate = lr
print("Best Learning Rate:", best_learning_rate, "with Accuracy:", best_accuracy)
neurons_options = [10, 20, 50, 100]
layers_options = [1, 2, 3]
layer_neuron_combinations = list(product(layers_options, neurons_options))
best_layer_neuron_accuracy = 0
best_layer_neuron_config = None
for layers, neurons in layer_neuron_combinations:
hidden_layer_sizes = (neurons,) * layers # (neurons,) repeated for each layer count
mlp = MLPClassifier(activation='tanh', hidden_layer_sizes=hidden_layer_sizes,
random_state=42, max_iter=500)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
if accuracy > best_layer_neuron_accuracy:
best_layer_neuron_accuracy = accuracy
best_layer_neuron_config = hidden_layer_sizes
print("Best Layer-Neuron Configuration:", best_layer_neuron_config, "with Accuracy:",
best_layer_neuron_accuracy)