In [4]: !
pip install xgboost
Collecting xgboost
Downloading xgboost-2.1.3-py3-none-win_amd64.whl (124.9 MB)
Requirement already satisfied: scipy in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from xgboost) (1.5.2)
Requirement already satisfied: numpy in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from xgboost) (1.19.2)
Installing collected packages: xgboost
Successfully installed xgboost-2.1.3
XGBOOST
In [5]: import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
import pandas as pd
In [6]: # Example: Load a sample dataset (Iris for classification)
from sklearn.datasets import load_iris
data = load_iris()
X, y = data.data, data.target
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [7]: # Define XGBoost Classifier
xgb_classifier = xgb.XGBClassifier(
objective="multi:softmax", # Multi-class classification
eval_metric="mlogloss", # Evaluation metric
use_label_encoder=False, # Suppress warnings
max_depth=6,
learning_rate=0.1,
n_estimators=100,
random_state=42
)
# Train the model
xgb_classifier.fit(X_train, y_train)
C:\Users\Vineeta Shrivastava\anaconda3\lib\site-packages\xgboost\core.py:158: UserWarning: [18:27:50] WARNING: C:\buildkite-agent\builds\buildkite-windows-cpu-autoscaling-group-i-0c55ff5f71b100e98-1\xgboost\xgboost-ci-windows\src
\learner.cc:740:
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
Out[7]: XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric='mlogloss',
feature_types=None, gamma=None, grow_policy=None,
importance_type=None, interaction_constraints=None,
learning_rate=0.1, max_bin=None, max_cat_threshold=None,
max_cat_to_onehot=None, max_delta_step=None, max_depth=6,
max_leaves=None, min_child_weight=None, missing=nan,
monotone_constraints=None, multi_strategy=None, n_estimators=100,
n_jobs=None, num_parallel_tree=None, objective='multi:softmax', ...)
In [8]: # Predict on test data
y_pred = xgb_classifier.predict(X_test)
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
Accuracy: 100.00%
ADA BOOST
In [9]: !pip install scikit-learn
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
import pandas as pd
from sklearn.datasets import load_iris
data = load_iris()
X, y = data.data, data.target
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define base estimator
base_classifier = DecisionTreeClassifier(max_depth=1)
# Define AdaBoost Classifier
ada_classifier = AdaBoostClassifier(
base_estimator=base_classifier,
n_estimators=50, # Number of weak learners
learning_rate=1.0, # Shrinkage parameter
random_state=42
)
# Train the model
ada_classifier.fit(X_train, y_train)
# Predict on test data
y_pred = ada_classifier.predict(X_test)
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
import matplotlib.pyplot as plt
# Feature importance visualization
if 'ada_classifier' in globals():
importances = ada_classifier.feature_importances_
elif 'ada_regressor' in globals():
importances = ada_regressor.feature_importances_
plt.bar(range(len(importances)), importances)
plt.xlabel("Features")
plt.ylabel("Importance")
plt.title("Feature Importance in AdaBoost")
plt.show()
Requirement already satisfied: scikit-learn in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (0.23.2)
Requirement already satisfied: scipy>=0.19.1 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (1.5.2)
Requirement already satisfied: numpy>=1.13.3 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (1.19.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (2.1.0)
Requirement already satisfied: joblib>=0.11 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (0.17.0)
Accuracy: 100.00%
GRADIENT BOOST
In [11]: !pip install scikit-learn
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
import pandas as pd
from sklearn.datasets import load_iris
data = load_iris()
X, y = data.data, data.target
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier(
n_estimators=100, # Number of boosting stages
learning_rate=0.1, # Shrinkage rate
max_depth=3, # Depth of individual estimators
random_state=42
)
# Train the model
gb_classifier.fit(X_train, y_train)
# Predict on test data
y_pred = gb_classifier.predict(X_test)
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
import matplotlib.pyplot as plt
# Feature importance visualization
if 'gb_classifier' in globals():
importances = gb_classifier.feature_importances_
elif 'gb_regressor' in globals():
importances = gb_regressor.feature_importances_
plt.bar(range(len(importances)), importances)
plt.xlabel("Features")
plt.ylabel("Importance")
plt.title("Feature Importance in Gradient Boosting")
plt.show()
Requirement already satisfied: scikit-learn in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (0.23.2)
Requirement already satisfied: scipy>=0.19.1 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (1.5.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (2.1.0)
Requirement already satisfied: numpy>=1.13.3 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (1.19.2)
Requirement already satisfied: joblib>=0.11 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (0.17.0)
Accuracy: 100.00%
RANDOM FOREST
In [12]: !pip install scikit-learn matplotlib
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
# Load Iris dataset
data = load_iris()
X, y = data.data, data.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define the Random Forest Classifier
rf_classifier = RandomForestClassifier(
n_estimators=100, # Number of trees in the forest
max_depth=None, # Depth of trees (None means fully grown)
random_state=42
)
# Train the Random Forest Classifier
rf_classifier.fit(X_train, y_train)
# Predict on test data
y_pred = rf_classifier.predict(X_test)
# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
# Feature Importance Visualization
importances = rf_classifier.feature_importances_
# Plot the feature importance
plt.barh(data.feature_names, importances)
plt.xlabel("Importance")
plt.title("Feature Importance in Random Forest Classifier")
plt.show()
Requirement already satisfied: scikit-learn in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (0.23.2)
Requirement already satisfied: matplotlib in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (3.3.2)
Requirement already satisfied: scipy>=0.19.1 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (1.5.2)
Requirement already satisfied: joblib>=0.11 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (0.17.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (2.1.0)
Requirement already satisfied: numpy>=1.13.3 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from scikit-learn) (1.19.2)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from matplotlib) (1.3.0)
Requirement already satisfied: certifi>=2020.06.20 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from matplotlib) (2020.6.20)
Requirement already satisfied: pillow>=6.2.0 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from matplotlib) (8.0.1)
Requirement already satisfied: cycler>=0.10 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from matplotlib) (0.10.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from matplotlib) (2.4.7)
Requirement already satisfied: python-dateutil>=2.1 in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from matplotlib) (2.8.1)
Requirement already satisfied: six in c:\users\vineeta shrivastava\anaconda3\lib\site-packages (from cycler>=0.10->matplotlib) (1.15.0)
Accuracy: 100.00%
In [ ]: