0% found this document useful (0 votes)
4 views12 pages

Ailab 2

The document contains multiple sections demonstrating various machine learning techniques using Python, including logistic regression for diabetes prediction, Naive Bayes for spam classification, K-Means clustering on the Iris dataset, and decision trees for species classification. Each section includes data preprocessing, model training, evaluation metrics, and visualizations such as confusion matrices and scatter plots. The document illustrates practical applications of machine learning algorithms with real datasets.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views12 pages

Ailab 2

The document contains multiple sections demonstrating various machine learning techniques using Python, including logistic regression for diabetes prediction, Naive Bayes for spam classification, K-Means clustering on the Iris dataset, and decision trees for species classification. Each section includes data preprocessing, model training, evaluation metrics, and visualizations such as confusion matrices and scatter plots. The document illustrates practical applications of machine learning algorithms with real datasets.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

import pandas as pd

record = pd.read_csv("/content/diabetes.csv")
x = record.drop('Outcome',axis = 1)
y = record['Outcome']
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25,random_state = 16)
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(random_state=16,max_iter=1000)
logreg.fit(x_train,y_train)
y_pred = logreg.predict(x_test)
from sklearn import metrics
cnf_matrix = metrics.confusion_matrix(y_test,y_pred)
cnf_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics # Import metrics here
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd

# Load data
record = pd.read_csv("/content/diabetes.csv")

# Prepare data
x = record.drop('Outcome',axis = 1)
y = record['Outcome']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25,random_state = 16)

# Train model
logreg = LogisticRegression(random_state=16,max_iter=1000)
logreg.fit(x_train,y_train)

# Define y_test and y_pred


y_pred = logreg.predict(x_test)

# Calculate confusion matrix


cnf_matrix = metrics.confusion_matrix(y_test, y_pred)

plt.figure(figsize=(9,9))
sns.heatmap(cnf_matrix, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {0}'.format(logreg.score(x_test, y_test))
plt.title(all_sample_title, size = 15);
plt.show()
output:

array([[116, 9],
[ 25, 42]])
import numpy as np
#Activation function
def sigmoid(x):
return 1/(1+np.exp(-x))
X=np.array([[1,0,0],[1,0,1],[1,1,0],[1,1,1]])
Tj=([[0],[1],[1],[0]])
#initial random weight
np.random.seed(42)
W1=np.random.randn(3,4)
W2=np.random.randn(4,1)
lr=0.1
epoch=10000

for epoch in range(epoch):


Ij_hidden=np.dot(X,W1) #net input of hidden layer
Oj_hidden=sigmoid(Ij_hidden) #output of hidden layer

Ij_output=np.dot(Oj_hidden,W2) #net input of output layer


Oj_output=sigmoid(Ij_output) #final output(y_pred)

#error at output layer


Err_output=Oj_output*(1-Oj_output)*(Tj-Oj_output)

#error at hidden layer


Err_hidden=Oj_hidden*(1-Oj_hidden)*np.dot(Err_output,W2.T)

#Update weight
W2+=lr*np.dot(Oj_hidden.T,Err_output) #wij for W2
W1+=lr*np.dot(X.T,Err_hidden) #wij for W1

#print error occasionally


if epoch % 2000 == 0:
E=0.5*np.sum((Tj-Oj_output)**2)
print(f"Epoch:{epoch},Error:{E:.4f}")

print("Predicted output")
print(Oj_output)
Epoch:0,Error:0.8641
Epoch:2000,Error:0.2243
Epoch:4000,Error:0.0213
Epoch:6000,Error:0.0089
Epoch:8000,Error:0.0055
Predicted output
[[0.02646161]
[0.95237071]
[0.95594195]
[0.05369205]]
import numpy as np
import pandas as pd

data=pd.read_csv("spam.csv")
data.head(5)
data['Category'] = data['Category'].map({'spam': 1, 'ham': 0})
#checks for null values and prints its count
print(data.isnull().sum())
#drops NaN values
data.dropna(inplace=True)
#get count of duplicate values
print(data.duplicated().sum())
data.drop_duplicates(inplace=True)
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data['Message'])
from sklearn.model_selection import train_test_split
y = data['Category']
X_train, X_test, y_train, y_test =
train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
nb_model = MultinomialNB(alpha=1.0)
nb_model.fit(X_train, y_train)
y_pred = nb_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


print("Accuracy:", accuracy)

from sklearn.metrics import confusion_matrix


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=['Actual Ham (0)', 'Actual Spam (1)'],
columns=['Predicted Ham (0)', 'Predicted Spam (1)'])
print("Confusion Matrix:\n", cm_df)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"\nAccuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

plt.figure(figsize=(6, 5))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.ylabel('Actual Label')
plt.xlabel('Predicted Label')
plt.show()

Output:

Categor Message
y

0 ham Go until jurong point, crazy.. Available only ...

1 ham Ok lar... Joking wif u oni...

2 spam Free entry in 2 a wkly comp to win FA Cup fina...

3 ham U dun say so early hor... U c already then say...

ham Nah I don't think he goes to usf, he lives aro...


4

Category 0
Message 0
dtype: int64

415
Accuracy: 0.9728682170542635

Confusion Matrix:
Predicted Ham (0) Predicted Spam (1)
Actual Ham (0) 887 17
Actual Spam (1) 11 117

Accuracy: 0.9729
Precision: 0.8731
Recall: 0.9141
F1-Score: 0.8931
import pandas as pd
df=pd.read_csv("Iris.csv")
df.head()
X = df.drop('Species', axis=1)
X.head()
from sklearn.cluster import KMeans
# Apply K-Means Clustering with n_clusters = 3
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
kmeans.fit(X)

labels = kmeans.labels_ # Get the cluster labels


df['Cluster'] = labels
df.head()
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
scatter = plt.scatter(df['PetalLengthCm'], df['PetalWidthCm'], c=df['Cluster'], cmap='viridis',
marker='o')
plt.title('K-Means Clustering on Iris Dataset (Petal Length vs Petal Width)')
plt.xlabel('PetalLengthCm')
plt.ylabel('PetalWidthCm')
plt.colorbar(label='Cluster')
plt.grid(True)

legend_elements = [plt.scatter([], [], marker='o', color=scatter.to_rgba(label), label=f'Cluster


{label}') for label in sorted(df['Cluster'].unique())]
plt.legend(handles=legend_elements, title='Clusters')

plt.show()
cross_tab = pd.crosstab(df['Species'], df['Cluster'])
display(cross_tab)
Output:
Id SepalLength SepalWidth PetalLengthC PetalWidthC Specie
Cm Cm m m s

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

3 4 4.6 3.1 1.5 0.2 Iris-setosa

5 5.0 3.6 1.4 0.2 Iris-setosa


4

Id SepalLength SepalWidth PetalLengthC PetalWidthC


Cm Cm m m

0 1 5.1 3.5 1.4 0.2

1 2 4.9 3.0 1.4 0.2

2 3 4.7 3.2 1.3 0.2

3 4 4.6 3.1 1.5 0.2

5 5.0 3.6 1.4 0.2


4

Id SepalLengthC SepalWidthC PetalLengthC PetalWidthC Species Cluster


m m m m

1 5.1 3.5 1.4 0.2 Iris-seto 2


0
sa

2 4.9 3.0 1.4 0.2 Iris-seto 2


1
sa

3 4.7 3.2 1.3 0.2 Iris-seto 2


2
sa

4 4.6 3.1 1.5 0.2 Iris-seto 2


3
sa

5 5.0 3.6 1.4 0.2 Iris-seto 2


4
sa
Cluster 0 1 2

Species

Iris-setosa 1 0 49

Iris-versicolor 49 1 0

0 50 0
Iris-virginica
import pandas as pd
df = pd.read_csv('/content/species_dataset.csv')
df.head()
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
X = df.drop('species', axis=1)
y = df['species']
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_encoded = encoder.fit_transform(X)
feature_names = encoder.get_feature_names_out(X.columns)
model = DecisionTreeClassifier(criterion='entropy', random_state=42)
model.fit(X_encoded, y)
plt.figure(figsize=(15, 10))
plot_tree(model,
feature_names=feature_names,
class_names=model.classes_,
filled=True,
rounded=True,
proportion=True,
fontsize=10)
plt.title("Decision Tree (ID3-like) for Species Classification")
plt.show()
Output:
Toothe Hair Breathes Legs species
d

Toothed Hair Breathe Legs Mammal


0
s

Toothed Hair Breathe Legs Mammal


1
s

Toothed Hair Breathe Legs Mammal


2
s

Toothed Not Hair Breathe Not Reptile


3
s Legs

Not Hair Breathe Legs Mammal


4
Toothed s

You might also like