Program 10
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposi on import PCA
from sklearn.metrics import classifica on_report,confusion_matrix,accuracy_score
data = load_breast_cancer()
X = data.data
Y = data.target
scaler = StandardScaler()
X_scaler = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=2,random_state=42)
y_kmeans = kmeans.fit_predict(X_scaler)
print("Accuracy SCore: ")
print(accuracy_score(Y,y_kmeans))
print("Classifica on Report...")
print(classifica on_report(Y,y_kmeans))
print("Confusion Mateisx...")
print(confusion_matrix(Y,y_kmeans))
pca = PCA(n_components=2)
x_pca = pca.fit_transform(X_scaler)
df = pd.DataFrame(x_pca,columns=['PC1','PC2'])
df['cluster']= y_kmeans
df['True Labels'] = Y
plt.figure(figsize=(8,6))
sns.sca erplot(data=df,
x='PC1',y='PC2',hue='cluster',s=100,pale e='Set1',alpha=0.7,edgecolor='black')
plt. tle("Clustering on K-Means Cluster Algorithm")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.show()
plt.figure(figsize=(8,6))
sns.sca erplot(data=df, x='PC1',y='PC2',hue='True
Labels',s=100,pale e='coolwarm',alpha=0.7,edgecolor='black')
plt. tle("Clustering on True Labels")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.show()
plt.figure(figsize=(8,6))
sns.sca erplot(data=df,
x='PC1',y='PC2',hue='cluster',s=100,pale e='Set1',alpha=0.9,edgecolor='black')
centers = pca.transform(kmeans.cluster_centers_)
plt.sca er(x=centers[:,0],y=centers[:,1],color='red',marker='X',s=200,label='centroids')
plt. tle("Clustering on Kmeans along with Centroids")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt. tle(label='Centroid')
plt.legend( tle='Cluster')
plt.show()
Program 9
import numpy as np
from sklearn.datasets import fetch_olive _faces
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selec on import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selec on import cross_val_score
data = fetch_olive _faces(shuffle=True,random_state=42)
X = data.data
Y = data.target
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)
gnb = GaussianNB()
gnb.fit(X_train,Y_train)
Y_pred = gnb.predict(X_test)
print(accuracy_score(Y_test,Y_pred))
print(confusion_matrix(Y_test,Y_pred))
print(classifica on_report(Y_test,Y_pred))
cross_val = cross_val_score(gnb,X,Y,cv=5,scoring='accuracy')
print(cross_val.mean())
fig, axes = plt.subplots(3,5,figsize=(12,6))
for ax, image, predic on, True_label in zip(axes.ravel(),X_train,Y_pred,Y_test):
ax.imshow(image.reshape(64,64),cmap=plt.cm.gray)
ax.set_ tle(f"True : {True_label}, Pred : {predic on}")
ax.axis('off')
plt.show()
Program 8
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selec on import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
data = load_breast_cancer()
X = data.data
Y = data.target
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
clf = DecisionTreeClassifier()
clf.fit(X_train,Y_train)
Y_pred = clf.predict(X_test)
print(accuracy_score(Y_test,Y_pred) * 100)
new_sample = np.array([X_test[10]])
predict = clf.predict(new_sample)
if predict == 1:
predic on_class='Bengin'
elif predict == 0:
predic on_class='Malignant'
print("The Predicted Class Is ......",predic on_class)
plt.figure(figsize=(30,50))
tree.plot_tree(clf,feature_names = data.feature_names, class_names =
data.target_names,filled=True)
plt.show()
Progarm 7
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selec on import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
def liner_reg():
data = fetch_california_housing(as_frame=True)
X = data.data[['AveRooms']]
Y = data.target
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
model = LinearRegression()
model.fit(X_train,Y_train)
Y_predict = model.predict(X_test)
print("Mean Error")
print(mean_squared_error(Y_test,Y_predict))
print("R2 Error")
print(r2_score(Y_test,Y_predict))
plt.figure(figsize=(8,6))
plt.sca er(X_test,Y_test,color='blue')
plt.plot(X_test,Y_predict,color='red')
plt.show()
def poly():
url = 'h ps://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-
mpg.data'
names = ['mpg','cylinders','displacement','horse
power','weight','accelera on','model','origin','car name']
data = pd.read_csv(
url, sep='\s+',names=names,na_values='?')
data=data.dropna()
X = data['displacement'].values.reshape(-1,1)
Y = data['mpg'].values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
model =
make_pipeline(PolynomialFeatures(degree=2),StandardScaler(),LinearRegression())
model.fit(X_train,Y_train)
Y_predict = model.predict(X_test)
print("Mean Error")
print(mean_squared_error(Y_test,Y_predict))
print("R2 Error")
print(r2_score(Y_test,Y_predict))
plt.figure(figsize=(8,6))
plt.sca er(X_test,Y_test,color='blue')
plt.plot(X_test,Y_predict,color='red')
plt.show()
if __name__=='__main__':
liner_reg()
poly()
program 6
import numpy as np
import matplotlib.pyplot as plt
def gaussian_kernel(x, xi, tau):
return np.exp(-(np.sum(x-xi)**2)/(2*tau**2))
def local_weight_reg(x, X, y, tau):
m = X.shape[0]
weights = np.array([gaussian_kernel(x,X[i],tau) for i in range(m)])
W = np.diag(weights)
X_trans_W = X.T @ W
theta = np.linalg.inv(X_trans_W @ X) @ X_trans_W @ y
return x @ theta
np.random.seed(42)
X = np.linspace(0,2*np.pi,100)
y = np.sin(X) + 0.1 *(np.random.randn(100))
X_bias = np.c_[np.ones(X.shape),X]
X_test = np.linspace(0,2*np.pi,200)
X_bias_test = np.c_[np.ones(X_test.shape),X_test]
tau =0.5
y_pred = np.array([local_weight_reg(x ,X_bias ,y ,tau) for x in X_bias_test])
plt.figure(figsize=(8,6))
plt.sca er(X,y,color='red',label='tarin')
plt.plot(X_test,y_pred,color='blue',label='test')
plt.grid(alpha=0.2)
plt.legend()
plt.show()
Program 5
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
data = np.random.rand(100)
labels = ['class1' if x<=0.5 else 'class2' for x in data[:50] ]
train_data = data[:50].reshape(-1,1)
train_labels = labels
test_data = data[50:].reshape(-1,1)
k_values = [1,2,3,4,5,20,30]
for k in k_values:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(train_data,train_labels)
pred = knn.predict(test_data)
for i, pr in enumerate(pred,start=51):
print(f"Point x{i} ={test_data[i-51][0]:.2f} is class {pr}")
class1 = [test_data[i][0] for i in range(len(pred)) if pred[i] == 'class1']
class2 = [test_data[i][0] for i in range(len(pred)) if pred[i] == 'class2']
plt.figure(figsize=(8,6))
plt.sca er(train_data,[0]*len(train_data),c=['blue' if label=='class1' else 'red' for label in
train_labels],marker='o',label='training')
plt.sca er(class1,[1]*len(class1),c='blue',marker='X',label='clas1 (test)')
plt.sca er(class2,[1]*len(class2),c='red',marker='X',label='clas2 (test)')
plt. tle(f'k={k}')
plt.legend()
plt.show()
Program 4
import pandas as pd
def find_s_algo(filepath):
df = pd.read_csv(filepath)
print("Training Data")
print(df)
a ributes = df.columns[:-1]
class_label = df.columns[-1]
hypothesis = ['?' for _ in a ributes]
for index,row in df.iterrows():
if row[class_label] == 'Yes':
if '?' in hypothesis:
hypothesis = list(row[:-1])
else:
for i in range(len(hypothesis)):
if hypothesis[i] != row[i]:
hypothesis[i] = '?'
print(f' Step {index} Hypothessi : {hypothesis}')
return hypothesis
filepath= r"C:\Users\priya\Downloads\enjoysport_training_data.csv"
a= find_s_algo(filepath)
print("Finsl")
print(a)
Training Data
Sky Temp Humidity Wind Water Forecast EnjoySport
0 Sunny Warm Normal Strong Warm Same Yes
1 Sunny Warm High Strong Warm Same Yes
2 Rainy Cold High Strong Warm Change No
3 Sunny Warm High Strong Cool Change Yes
4 Sunny Warm Normal Strong Warm Same Yes
Step 0 Hypothessi : ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
Step 1 Hypothessi : ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
Step 3 Hypothessi : ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change']
Step 4 Hypothessi : ['Sunny', 'Warm', '?', 'Strong', '?', '?']
Finsl
['Sunny', 'Warm', '?', 'Strong', '?', '?']
Program 1
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing(as_frame=True)
housing_df = data.frame
housing_df=housing_df.dropna()
numerical_features = housing_df.select_dtypes(include=[np.number]).columns
plt.figure(figsize=(15,10))
for i,features in enumerate(numerical_features):
plt.subplot(3,3,i+1)
sns.histplot(housing_df[features],bins=30,kde=True,color='blue')
plt. tle('Distribu on of Features of California Hosusing')
plt. ght_layout()
plt.show()
plt.figure(figsize=(15,10))
for i,features in enumerate(numerical_features):
plt.subplot(3,3,i+1)
sns.boxplot(housing_df[features],color='orange')
plt. tle("box plot")
plt. ght_layout()
plt.show()
outliers_sum = {}
for feature in numerical_features:
Q1 = housing_df[feature].quan le(0.25)
Q3 = housing_df[feature].quan le(0.75)
IQR = Q3 - Q1
lower_bound = Q1-1.5*IQR
upper_bound = Q3+1.5*IQR
outlier = housing_df[(housing_df[feature] < lower_bound) | (housing_df[feature] >
upper_bound) ]
outliers_sum[feature] = len(outlier)
print(f" Feature {feature} is {len(outlier)}...")
Program 2
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing(as_frame=True)
housing_df = data.frame
comat = housing_df.corr()
plt.figure(figsize=(8,6))
sns.heatmap(comat,annot=True,cmap='coolwarm')
plt.show()
sns.pairplot(housing_df)
plt.show()
Program 3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposi on import PCA
from sklearn.datasets import load_iris
iris = load_iris()
data = iris.data
labels = iris.target
label_names = iris.target_names
pca = PCA(n_components=2)
x_pca = pca.fit_transform(data)
df = pd.DataFrame(x_pca, columns= ['PC1','PC2'])
df['Labels'] = labels
colors = ['r','g','b']
plt.figure(figsize=(8,6))
for i, label in enumerate(np.unique(labels)):
plt.sca er(df[df['Labels']==label]['PC1'],df[df['Labels']==label]['PC2'],c=colors[i],label=labe
l_names[label])
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend()
plt.show()