# Required imports
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder, StandardScaler
from [Link] import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from [Link] import SVC
from [Link] import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
# Step 1: Encode categorical features
data_encoded = [Link]()
label_encoders = {}
for col in data.select_dtypes(include='object').columns:
label_encoders[col] = LabelEncoder()
data_encoded[col] = label_encoders[col].fit_transform(data[col])
# Step 2: Split features and target
X = data_encoded.drop(columns=['y'])
y = data_encoded['y']
# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42, stratify=y)
# Step 4: Apply SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
# Step 5: Scale features
scaler = StandardScaler()
X_train_smote = scaler.fit_transform(X_train_smote)
X_test = [Link](X_test)
# Step 6: Train and evaluate models
# KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
[Link](X_train_smote, y_train_smote)
y_pred_knn = [Link](X_test)
print("KNN Classifier:")
print(classification_report(y_test, y_pred_knn))
print(f"Accuracy: {accuracy_score(y_test, y_pred_knn)}")
# Logistic Regression
logreg = LogisticRegression(max_iter=1000, random_state=42)
[Link](X_train_smote, y_train_smote)
y_pred_logreg = [Link](X_test)
print("\nLogistic Regression:")
print(classification_report(y_test, y_pred_logreg))
print(f"Accuracy: {accuracy_score(y_test, y_pred_logreg)}")
# Support Vector Machine
svm = SVC(kernel='linear', random_state=42)
[Link](X_train_smote, y_train_smote)
y_pred_svm = [Link](X_test)
print("\nSupport Vector Machine:")
print(classification_report(y_test, y_pred_svm))
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm)}")
Here's the Python code that compares the accuracy scores of KNN,
Logistic Regression, and SVM, then displays the best accuracy score and
its corresponding algorithm:
# Required imports
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder, StandardScaler
from [Link] import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from [Link] import SVC
from [Link] import accuracy_score
from imblearn.over_sampling import SMOTE
# Step 1: Encode categorical features
data_encoded = [Link]()
label_encoders = {}
for col in data.select_dtypes(include='object').columns:
label_encoders[col] = LabelEncoder()
data_encoded[col] = label_encoders[col].fit_transform(data[col])
# Step 2: Split features and target
X = data_encoded.drop(columns=['y'])
y = data_encoded['y']
# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42, stratify=y)
# Step 4: Apply SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
# Step 5: Scale features
scaler = StandardScaler()
X_train_smote = scaler.fit_transform(X_train_smote)
X_test = [Link](X_test)
# Step 6: Train and evaluate models
accuracy_scores = {}
# KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
[Link](X_train_smote, y_train_smote)
y_pred_knn = [Link](X_test)
accuracy_scores['KNN'] = accuracy_score(y_test, y_pred_knn)
# Logistic Regression
logreg = LogisticRegression(max_iter=1000, random_state=42)
[Link](X_train_smote, y_train_smote)
y_pred_logreg = [Link](X_test)
accuracy_scores['Logistic Regression'] = accuracy_score(y_test,
y_pred_logreg)
# Support Vector Machine
svm = SVC(kernel='linear', random_state=42)
[Link](X_train_smote, y_train_smote)
y_pred_svm = [Link](X_test)
accuracy_scores['SVM'] = accuracy_score(y_test, y_pred_svm)
# Step 7: Find and display the best accuracy score
best_algorithm = max(accuracy_scores, key=accuracy_scores.get)
best_accuracy = accuracy_scores[best_algorithm]
print("Accuracy Scores for Each Algorithm:")
for algorithm, score in accuracy_scores.items():
print(f"{algorithm}: {score:.4f}")
print(f"\nBest Algorithm: {best_algorithm}")
print(f"Best Accuracy Score: {best_accuracy:.4f}")
Explanation:
1. Accuracy Calculation: Each classifier's accuracy score is
calculated using accuracy_score.
2. Storing Results: Accuracy scores are stored in a dictionary with
algorithm names as keys.
3. Finding the Best Algorithm: The max function is used to
determine the algorithm with the highest accuracy score.
4. Display: The scores of all algorithms and the best algorithm are
displayed.
Run this code locally to compare the performance of the classifiers and
identify the best one for your dataset.
There are many algorithms you can use to classify your dataset. Here are
additional popular classification algorithms along with Python code:
1. Decision Tree
from [Link] import DecisionTreeClassifier
# Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
[Link](X_train_smote, y_train_smote)
y_pred_dt = [Link](X_test)
accuracy_scores['Decision Tree'] = accuracy_score(y_test, y_pred_dt)
2. Random Forest
from [Link] import RandomForestClassifier
# Random Forest Classifier
rf = RandomForestClassifier(random_state=42, n_estimators=100)
[Link](X_train_smote, y_train_smote)
y_pred_rf = [Link](X_test)
accuracy_scores['Random Forest'] = accuracy_score(y_test, y_pred_rf)
3. Gradient Boosting (e.g., XGBoost)
from xgboost import XGBClassifier
# XGBoost Classifier
xgb = XGBClassifier(random_state=42, use_label_encoder=False,
eval_metric='logloss')
[Link](X_train_smote, y_train_smote)
y_pred_xgb = [Link](X_test)
accuracy_scores['XGBoost'] = accuracy_score(y_test, y_pred_xgb)
4. Naive Bayes
from sklearn.naive_bayes import GaussianNB
# Naive Bayes Classifier
nb = GaussianNB()
[Link](X_train_smote, y_train_smote)
y_pred_nb = [Link](X_test)
accuracy_scores['Naive Bayes'] = accuracy_score(y_test, y_pred_nb)
5. Neural Networks (MLPClassifier)
from sklearn.neural_network import MLPClassifier
# Neural Network Classifier
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500,
random_state=42)
[Link](X_train_smote, y_train_smote)
y_pred_mlp = [Link](X_test)
accuracy_scores['Neural Network'] = accuracy_score(y_test, y_pred_mlp)
6. AdaBoost
from [Link] import AdaBoostClassifier
# AdaBoost Classifier
ada = AdaBoostClassifier(random_state=42, n_estimators=100)
[Link](X_train_smote, y_train_smote)
y_pred_ada = [Link](X_test)
accuracy_scores['AdaBoost'] = accuracy_score(y_test, y_pred_ada)
7. LightGBM
from lightgbm import LGBMClassifier
# LightGBM Classifier
lgbm = LGBMClassifier(random_state=42)
[Link](X_train_smote, y_train_smote)
y_pred_lgbm = [Link](X_test)
accuracy_scores['LightGBM'] = accuracy_score(y_test, y_pred_lgbm)
Final Comparison of All Algorithms
After adding the algorithms above, update the comparison and best score
display:
# Display all accuracy scores and the best algorithm
print("\nAccuracy Scores for Each Algorithm:")
for algorithm, score in accuracy_scores.items():
print(f"{algorithm}: {score:.4f}")
# Identify the best algorithm
best_algorithm = max(accuracy_scores, key=accuracy_scores.get)
best_accuracy = accuracy_scores[best_algorithm]
print(f"\nBest Algorithm: {best_algorithm}")
print(f"Best Accuracy Score: {best_accuracy:.4f}")
Steps to Use:
1. Copy and paste the desired algorithm's code into your script.
2. Run the script to compare the results.
3. Use the final comparison code to find the best-performing algorithm.