import pandas as pd
record = pd.read_csv("/content/diabetes.csv")
x = record.drop('Outcome',axis = 1)
y = record['Outcome']
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25,random_state = 16)
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(random_state=16,max_iter=1000)
logreg.fit(x_train,y_train)
y_pred = logreg.predict(x_test)
from sklearn import metrics
cnf_matrix = metrics.confusion_matrix(y_test,y_pred)
cnf_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics # Import metrics here
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd
# Load data
record = pd.read_csv("/content/diabetes.csv")
# Prepare data
x = record.drop('Outcome',axis = 1)
y = record['Outcome']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25,random_state = 16)
# Train model
logreg = LogisticRegression(random_state=16,max_iter=1000)
logreg.fit(x_train,y_train)
# Define y_test and y_pred
y_pred = logreg.predict(x_test)
# Calculate confusion matrix
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
plt.figure(figsize=(9,9))
sns.heatmap(cnf_matrix, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {0}'.format(logreg.score(x_test, y_test))
plt.title(all_sample_title, size = 15);
plt.show()
output:
array([[116, 9],
[ 25, 42]])
import numpy as np
#Activation function
def sigmoid(x):
return 1/(1+np.exp(-x))
X=np.array([[1,0,0],[1,0,1],[1,1,0],[1,1,1]])
Tj=([[0],[1],[1],[0]])
#initial random weight
np.random.seed(42)
W1=np.random.randn(3,4)
W2=np.random.randn(4,1)
lr=0.1
epoch=10000
for epoch in range(epoch):
Ij_hidden=np.dot(X,W1) #net input of hidden layer
Oj_hidden=sigmoid(Ij_hidden) #output of hidden layer
Ij_output=np.dot(Oj_hidden,W2) #net input of output layer
Oj_output=sigmoid(Ij_output) #final output(y_pred)
#error at output layer
Err_output=Oj_output*(1-Oj_output)*(Tj-Oj_output)
#error at hidden layer
Err_hidden=Oj_hidden*(1-Oj_hidden)*np.dot(Err_output,W2.T)
#Update weight
W2+=lr*np.dot(Oj_hidden.T,Err_output) #wij for W2
W1+=lr*np.dot(X.T,Err_hidden) #wij for W1
#print error occasionally
if epoch % 2000 == 0:
E=0.5*np.sum((Tj-Oj_output)**2)
print(f"Epoch:{epoch},Error:{E:.4f}")
print("Predicted output")
print(Oj_output)
Epoch:0,Error:0.8641
Epoch:2000,Error:0.2243
Epoch:4000,Error:0.0213
Epoch:6000,Error:0.0089
Epoch:8000,Error:0.0055
Predicted output
[[0.02646161]
[0.95237071]
[0.95594195]
[0.05369205]]
import numpy as np
import pandas as pd
data=pd.read_csv("spam.csv")
data.head(5)
data['Category'] = data['Category'].map({'spam': 1, 'ham': 0})
#checks for null values and prints its count
print(data.isnull().sum())
#drops NaN values
data.dropna(inplace=True)
#get count of duplicate values
print(data.duplicated().sum())
data.drop_duplicates(inplace=True)
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data['Message'])
from sklearn.model_selection import train_test_split
y = data['Category']
X_train, X_test, y_train, y_test =
train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
nb_model = MultinomialNB(alpha=1.0)
nb_model.fit(X_train, y_train)
y_pred = nb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
from sklearn.metrics import confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=['Actual Ham (0)', 'Actual Spam (1)'],
columns=['Predicted Ham (0)', 'Predicted Spam (1)'])
print("Confusion Matrix:\n", cm_df)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
plt.figure(figsize=(6, 5))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.ylabel('Actual Label')
plt.xlabel('Predicted Label')
plt.show()
Output:
Categor Message
y
0 ham Go until jurong point, crazy.. Available only ...
1 ham Ok lar... Joking wif u oni...
2 spam Free entry in 2 a wkly comp to win FA Cup fina...
3 ham U dun say so early hor... U c already then say...
ham Nah I don't think he goes to usf, he lives aro...
4
Category 0
Message 0
dtype: int64
415
Accuracy: 0.9728682170542635
Confusion Matrix:
Predicted Ham (0) Predicted Spam (1)
Actual Ham (0) 887 17
Actual Spam (1) 11 117
Accuracy: 0.9729
Precision: 0.8731
Recall: 0.9141
F1-Score: 0.8931
import pandas as pd
df=pd.read_csv("Iris.csv")
df.head()
X = df.drop('Species', axis=1)
X.head()
from sklearn.cluster import KMeans
# Apply K-Means Clustering with n_clusters = 3
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
kmeans.fit(X)
labels = kmeans.labels_ # Get the cluster labels
df['Cluster'] = labels
df.head()
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 6))
scatter = plt.scatter(df['PetalLengthCm'], df['PetalWidthCm'], c=df['Cluster'], cmap='viridis',
marker='o')
plt.title('K-Means Clustering on Iris Dataset (Petal Length vs Petal Width)')
plt.xlabel('PetalLengthCm')
plt.ylabel('PetalWidthCm')
plt.colorbar(label='Cluster')
plt.grid(True)
legend_elements = [plt.scatter([], [], marker='o', color=scatter.to_rgba(label), label=f'Cluster
{label}') for label in sorted(df['Cluster'].unique())]
plt.legend(handles=legend_elements, title='Clusters')
plt.show()
cross_tab = pd.crosstab(df['Species'], df['Cluster'])
display(cross_tab)
Output:
Id SepalLength SepalWidth PetalLengthC PetalWidthC Specie
Cm Cm m m s
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
5 5.0 3.6 1.4 0.2 Iris-setosa
4
Id SepalLength SepalWidth PetalLengthC PetalWidthC
Cm Cm m m
0 1 5.1 3.5 1.4 0.2
1 2 4.9 3.0 1.4 0.2
2 3 4.7 3.2 1.3 0.2
3 4 4.6 3.1 1.5 0.2
5 5.0 3.6 1.4 0.2
4
Id SepalLengthC SepalWidthC PetalLengthC PetalWidthC Species Cluster
m m m m
1 5.1 3.5 1.4 0.2 Iris-seto 2
0
sa
2 4.9 3.0 1.4 0.2 Iris-seto 2
1
sa
3 4.7 3.2 1.3 0.2 Iris-seto 2
2
sa
4 4.6 3.1 1.5 0.2 Iris-seto 2
3
sa
5 5.0 3.6 1.4 0.2 Iris-seto 2
4
sa
Cluster 0 1 2
Species
Iris-setosa 1 0 49
Iris-versicolor 49 1 0
0 50 0
Iris-virginica
import pandas as pd
df = pd.read_csv('/content/species_dataset.csv')
df.head()
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
X = df.drop('species', axis=1)
y = df['species']
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_encoded = encoder.fit_transform(X)
feature_names = encoder.get_feature_names_out(X.columns)
model = DecisionTreeClassifier(criterion='entropy', random_state=42)
model.fit(X_encoded, y)
plt.figure(figsize=(15, 10))
plot_tree(model,
feature_names=feature_names,
class_names=model.classes_,
filled=True,
rounded=True,
proportion=True,
fontsize=10)
plt.title("Decision Tree (ID3-like) for Species Classification")
plt.show()
Output:
Toothe Hair Breathes Legs species
d
Toothed Hair Breathe Legs Mammal
0
s
Toothed Hair Breathe Legs Mammal
1
s
Toothed Hair Breathe Legs Mammal
2
s
Toothed Not Hair Breathe Not Reptile
3
s Legs
Not Hair Breathe Legs Mammal
4
Toothed s