Machine Learning
lab---------1
import pandas as pd
import numpy as np
import seaborn as sns
import [Link] as plt
from [Link] import fetch_california_housing
data=fetch_california_housing(as_frame=True)
housing_df=[Link]
numerical_features=housing_df.select_dtypes(include=[[Link]])
.columns
[Link](figsize=(15,10))
for i,feature in enumerate(numerical_features):
[Link](3,3,i+1)
[Link](housing_df[feature],kde=True,bins=30,color='blue')
[Link](f'Distribution of{feature}')
plt.tight_layout()
[Link]()
[Link](figsize=(15,10))
for i,feature in enumerate(numerical_features):
[Link](3,3,i+1)
[Link](x=housing_df[feature],color='orange')
[Link](f'Box plot of {feature}')
plt.tight_layout()
[Link]()
print("outliers detection:")
outliers_summary={}
for feature in numerical_features:
Q1=housing_df[feature].quantile(0.20)
Q3=housing_df[feature].quantile(0.75)
IQR=Q3-Q1
lower_bound=Q1-1.5*IQR
upper_bound=Q3-1.5*IQR
outliers=housing_df[(housing_df[feature]<lower_bound)|(housing_df
[feature]>upper_bound)]
outliers_summary[feature]=len(outliers)
print(f"{feature}: {len(outliers)} outliers")
print("\n dataset summary:")
print(housing_df.describe())
lab---2
import pandas as pd
import [Link] as plt
import seaborn as sns
from [Link] import fetch_california_housing
california_housing=fetch_california_housing()
data=[Link](california_housing.data,columns=california_ho
using.feature_names)
data['medHouse val']=california_housing.target
correlation_matrix=[Link]()
print("Correlation matrix :")
print(correlation_matrix)
[Link](figsize=(10,8))
[Link](correlation_matrix , annot=True,cmap='coolwarm',
fmt='.2f',linewidth=0.5)
[Link]('correlation matrix Heatmap')
[Link]()
[Link](data)
[Link]('pair plot of numerical Features',y=1.02)
[Link]()
lab-------3
import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns
from [Link] import load_iris
from [Link] import PCA
from [Link] import StandardScaler
iris=load_iris()
x=[Link]
y=[Link]
target_names=iris.target_names
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)
pca=PCA(n_components=2)
x_pca=pca.fit_transform(x_scaled)
df_pca=[Link](x_pca,columns=['pc1','pc2'])
df_pca["target"]=y
[Link](figsize=(10,6))
[Link](x=df_pca['pc1'],y=df_pca['pc2']
,hue=df_pca['target'],palette='viridis',legend=True)
[Link]("principal component1")
[Link]("principal component2")
[Link]("PCA on iris dataset(40+20)")
[Link](labels=target_names)
[Link]()
lab-----------4
import pandas as pd
def find_s_algorithm(training_data):
hypothesis = None
for index, row in training_data.iterrows():
if row['play tennis'] == 'Yes':
instance = row[:-1].values
if hypothesis is None:
hypothesis = [Link]()
else:
for i in range(len(hypothesis)):
if hypothesis[i] != instance[i]:
hypothesis[i] = '?'
return hypothesis
df = pd.read_csv('[Link]')
[Link] = ['outlook', 'Temperature', 'Humidity', 'Wind', 'play
tennis']
final_hypothesis = find_s_algorithm(df)
print("Final Hypothesis:", final_hypothesis)
lab----------------------5
import numpy as np
import [Link] as plt
from [Link] import KNeighborsClassifier
[Link](42)
x=[Link](100,1)
y=[Link](['class1' if xi<=0.5 else 'class2'for xi in x[:50]])
k_values=[1,2,3,4,5,20,30]
for k in k_values:
knn=KNeighborsClassifier(n_neighbors=k)
[Link](x[:50],y)
y_pred=[Link](x[50:])
print(f"\n Result for k={k}:")
for i, label in enumerate(y_pred,start=51):
print(f"x{i}={x[i-1][0]:.3f}-> classified as {label}")
[Link](figsize=(8,5))
[Link](x[:50],[Link](50),c=['blue' if yi=='class1' else 'red' for
yi in y], label="labelled Data")
[Link](x[:50],[Link](50),c='black',marker='x',
label="unlabelled Data")
[Link]('x values')
[Link]('classification')
[Link]('KNN classification of Random Data points')
[Link]()
[Link]()
lab----------------------6
import numpy as np
import [Link] as plt
[Link](42)
x=[Link](-3,3,100)
y=[Link](x)+[Link](0,0.1,100)
x_matrix=np.c_[[Link]([Link][0]),x]
def get_weights(query_point,x,tau):
return [Link](-((x - query_point)**2) / (2 * tau**2))
def locally_weighted_regression(query_x,x,y,tau):
w=[Link](get_weights(query_x,x,tau))
theta=[Link](x_matrix.T@w@x_matrix)@(x_matrix.T@w@y)
return [Link]([1,query_x])@theta
tau_values=[0.1,0.3,1.0]
[Link](figsize=(10,6))
for tau in tau_values:
y_pred=[Link]([locally_weighted_regression(xi,x,y,tau) for xi in
x])
[Link](x,y_pred,label=f"Tau={tau}")
[Link](x,y,color="black",label="original data",alpha=0.5)
[Link]("X")
[Link]("Y")
[Link]("Locally Weighted Regression with Different Tau Values")
[Link]()
[Link]()
lab----------------------7
import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error
# Load the dataset
file_path =r"C:\Users\ML-34\Downloads\[Link]"
df_auto_mpg = pd.read_csv(file_path)
# Convert 'horsepower' to numeric and coerce errors to NaN
df_auto_mpg["horsepower"] =
pd.to_numeric(df_auto_mpg["horsepower"], errors="coerce")
# Drop rows with NaN values
df_auto_mpg.dropna(inplace=True)
# Define the feature and target variables
x_poly = df_auto_mpg[["horsepower"]].astype(float) # Feature:
horsepower
y_poly = df_auto_mpg["mpg"] # Target: mpg
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_poly, y_poly,
test_size=0.2, random_state=42)
# Apply Polynomial Feature Transformation (Degree 2)
poly = PolynomialFeatures(degree=2)
x_train_poly = poly.fit_transform(x_train)
x_test_poly = [Link](x_test)
# Train the Polynomial Regression model
poly_reg = LinearRegression()
poly_reg.fit(x_train_poly, y_train)
# Make predictions on the test set
y_pred_poly = poly_reg.predict(x_test_poly)
# Calculate Mean Squared Error (MSE)
mse_poly = mean_squared_error(y_test, y_pred_poly)
print(f"Polynomial Regression (Degree 2) MSE: {mse_poly}")
# Sort the test data for plotting purposes
sorted_indices = [Link](x_test.[Link]())
x_test_sorted = x_test.[Link]()[sorted_indices]
y_test_sorted = y_test.[Link]()[sorted_indices]
y_pred_sorted = y_pred_poly[sorted_indices]
# Plotting the results
[Link](figsize=(8, 5))
[Link](x_test, y_test, color="blue", label="Actual MPG")
[Link](x_test_sorted, y_pred_sorted, color="red", linewidth=2,
label="Polynomial Regression (Degree 2)")
[Link]("Horsepower")
[Link]("MPG (Fuel Efficiency)")
[Link]("Polynomial Regression on Auto MPG Dataset")
[Link]()
[Link]()
lab--------------------8
import numpy as np
import pandas as pd
from [Link] import load_breast_cancer
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import accuracy_score
from sklearn import tree
import [Link] as plt
data = load_breast_cancer()
x = [Link]
y = [Link]
# Split into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,
random_state=42)
# Initialize and train the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
[Link](x_train, y_train)
# Make predictions
y_pred = [Link](x_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Decision Tree Classifier: {accuracy * 100:.2f}%')
# Predict a new sample
new_sample = [Link]([[14.3, 18.1, 92.8, 0.097, 0.084, 0.083,
0.080, 0.077,0.064, 0.087, 0.087, 0.077, 0.089, 0.085, 0.085, 0.080,
0.095, 0.087, 0.090, 0.091, 0.080, 0.076, 0.080, 0.092, 0.079,
0.089, 0.090, 0.091, 0.089, 0.080]])
new_prediction = [Link](new_sample)
print(f'Predicted class for the new sample: {"Benign" if
new_prediction[0] == 1 else "Malignant"}')
# Plot the decision tree
[Link](figsize=(12, 8))
tree.plot_tree(clf, filled=True, feature_names=data.feature_names,
class_names=data.target_names)
[Link]()
lab-----------------------------9
import numpy as np
from [Link] import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score
data=fetch_olivetti_faces(shuffle=True,random_state=42)
x=[Link]
y=[Link]
x_train,x_test,y_train,y_test,=train_test_split(x,y,test_size=0.3,rando
m_state=42)
nb_classifier=GaussianNB()
nb_classifier.fit(x_train,y_train)
y_pred=nb_classifier.predict(x_test)
accuracy=accuracy_score(y_test,y_pred)
print(f'Accuracy of the Niave Bayes Classifier:{accuracy*100.:2f}%')
sample_index=0
new_sample=x_test[sample_index].reshape(1,-1)
new_prediction=nb_classifier.predict(new_sample)
print(f'Predicted person ID for the new sample:{new_prediction[0]}')
lab----------------------10
import numpy as np
import [Link] as plt
import pandas as pd
from [Link] import StandardScaler
from [Link] import KMeans
from [Link] import PCA
df=pd.read_csv("Prog 10-load_breast_cancer_wisconson.csv")
x=[Link](columns=['target']).values
y=df['target'].values
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)
kmeans=KMeans(n_clusters=2,random_state=42)
y_kmeans=kmeans.fit_predict(x_scaled)
pca=PCA(n_components=2)
x_pca=pca.fit_transform(x_scaled)
[Link](figsize=(8,6))
[Link](x_pca[y_kmeans==0,0],x_pca[y_kmeans==0,1],s=50,c="r
ed",label='cluster1(Malignant)')
[Link](x_pca[y_kmeans==1,0],x_pca[y_kmeans==1,1],s=50,c="
blue",label='cluster2(Benign)')
centers=kmeans.cluster_centers_
centers_pca=[Link](centers)
[Link](centers_pca[:,0],centers_pca[:,1],s=200,c="yellow",marke
r='x',label="centeroids")
[Link]("Principal component1")
[Link]("Principal component2")
[Link]("kmeans clustering on Breast cancer Dataset(PCA)")
[Link]()
[Link]()