RANDOM FOREST CLASSIFIER
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Generate a synthetic dataset with proper feature distribution
X, y = make_classification(n_samples=1000, n_features=3,
n_informative=3, n_redundant=0, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.3, random_state=42)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy on Test Set: ", accuracy_score(y_test, y_pred))
OUTPUT: Accuracy on Test Set: 0.9333333333333333
QUESTION 2:
from sklearn.datasets import load_iris
import pandas as pd
# Load the Iris dataset
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['species'] = data.target
print(df.head())
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# Evaluate the model
print("Accuracy on Test Set: ", accuracy_score(y_test, y_pred))
OUTPUT:
sepal length (cm) sepal width (cm) petal length (cm) petal width
(cm) \
0 5.1 3.5 1.4
0.2
1 4.9 3.0 1.4
0.2
2 4.7 3.2 1.3
0.2
3 4.6 3.1 1.5
0.2
4 5.0 3.6 1.4
0.2
species
0 0
1 0
2 0
3 0
4 0
Accuracy on Test Set: 1.0
QUESTION 3:
from sklearn.datasets import load_breast_cancer
import pandas as pd
# Load the Breast Cancer dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
print(df.head())
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# Evaluate the model
print("Accuracy on Test Set: ", accuracy_score(y_test, y_pred))
OUTPUT:
mean radius mean texture mean perimeter mean area mean
smoothness \
0 17.99 10.38 122.80 1001.0
0.11840
1 20.57 17.77 132.90 1326.0
0.08474
2 19.69 21.25 130.00 1203.0
0.10960
3 11.42 20.38 77.58 386.1
0.14250
4 20.29 14.34 135.10 1297.0
0.10030
mean compactness mean concavity mean concave points mean
symmetry \
0 0.27760 0.3001 0.14710
0.2419
1 0.07864 0.0869 0.07017
0.1812
2 0.15990 0.1974 0.12790
0.2069
3 0.28390 0.2414 0.10520
0.2597
4 0.13280 0.1980 0.10430
0.1809
mean fractal dimension ... worst texture worst perimeter worst
area \
0 0.07871 ... 17.33 184.60
2019.0
1 0.05667 ... 23.41 158.80
1956.0
2 0.05999 ... 25.53 152.50
1709.0
3 0.09744 ... 26.50 98.87
567.7
4 0.05883 ... 16.67 152.20
1575.0
worst smoothness worst compactness worst concavity worst concave
points \
0 0.1622 0.6656 0.7119
0.2654
1 0.1238 0.1866 0.2416
0.1860
2 0.1444 0.4245 0.4504
0.2430
3 0.2098 0.8663 0.6869
0.2575
4 0.1374 0.2050 0.4000
0.1625
worst symmetry worst fractal dimension target
0 0.4601 0.11890 0
1 0.2750 0.08902 0
2 0.3613 0.08758 0
3 0.6638 0.17300 0
4 0.2364 0.07678 0
[5 rows x 31 columns]
Accuracy on Test Set: 0.9649122807017544
QUESTION 4:
from sklearn.datasets import load_wine
import pandas as pd
# Load the Wine dataset
data = load_wine()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
print(df.head())
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Load the Wine dataset
data = load_wine()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy on Test Set: ", accuracy_score(y_test, y_pred))
alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \
0 14.23 1.71 2.43 15.6 127.0 2.80
1 13.20 1.78 2.14 11.2 100.0 2.65
2 13.16 2.36 2.67 18.6 101.0 2.80
3 14.37 1.95 2.50 16.8 113.0 3.85
4 13.24 2.59 2.87 21.0 118.0 2.80
flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \
0 3.06 0.28 2.29 5.64 1.04
1 2.76 0.26 1.28 4.38 1.05
2 3.24 0.30 2.81 5.68 1.03
3 3.49 0.24 2.18 7.80 0.86
4 2.69 0.39 1.82 4.32 1.04
od280/od315_of_diluted_wines proline target
0 3.92 1065.0 0
1 3.40 1050.0 0
2 3.17 1185.0 0
3 3.45 1480.0 0
4 2.93 735.0 0
Accuracy on Test Set: 1.0