0% found this document useful (0 votes)
8 views14 pages

Machine Learning

The document contains multiple labs focused on machine learning techniques, including data visualization, regression, classification, and clustering. It covers various datasets such as California housing, iris, breast cancer, and others, showcasing methods like PCA, KNN, decision trees, and locally weighted regression. Each lab includes code snippets for data processing, model training, and evaluation, along with visualizations to illustrate the results.

Uploaded by

Nidhi Prabhu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views14 pages

Machine Learning

The document contains multiple labs focused on machine learning techniques, including data visualization, regression, classification, and clustering. It covers various datasets such as California housing, iris, breast cancer, and others, showcasing methods like PCA, KNN, decision trees, and locally weighted regression. Each lab includes code snippets for data processing, model training, and evaluation, along with visualizations to illustrate the results.

Uploaded by

Nidhi Prabhu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Machine Learning

lab---------1

import pandas as pd
import numpy as np
import seaborn as sns
import [Link] as plt
from [Link] import fetch_california_housing
data=fetch_california_housing(as_frame=True)
housing_df=[Link]
numerical_features=housing_df.select_dtypes(include=[[Link]])
.columns
[Link](figsize=(15,10))
for i,feature in enumerate(numerical_features):
[Link](3,3,i+1)
[Link](housing_df[feature],kde=True,bins=30,color='blue')
[Link](f'Distribution of{feature}')
plt.tight_layout()
[Link]()

[Link](figsize=(15,10))
for i,feature in enumerate(numerical_features):
[Link](3,3,i+1)
[Link](x=housing_df[feature],color='orange')
[Link](f'Box plot of {feature}')
plt.tight_layout()
[Link]()

print("outliers detection:")
outliers_summary={}
for feature in numerical_features:
Q1=housing_df[feature].quantile(0.20)
Q3=housing_df[feature].quantile(0.75)
IQR=Q3-Q1
lower_bound=Q1-1.5*IQR
upper_bound=Q3-1.5*IQR

outliers=housing_df[(housing_df[feature]<lower_bound)|(housing_df
[feature]>upper_bound)]
outliers_summary[feature]=len(outliers)
print(f"{feature}: {len(outliers)} outliers")
print("\n dataset summary:")
print(housing_df.describe())

lab---2

import pandas as pd
import [Link] as plt
import seaborn as sns
from [Link] import fetch_california_housing
california_housing=fetch_california_housing()
data=[Link](california_housing.data,columns=california_ho
using.feature_names)
data['medHouse val']=california_housing.target
correlation_matrix=[Link]()
print("Correlation matrix :")
print(correlation_matrix)
[Link](figsize=(10,8))
[Link](correlation_matrix , annot=True,cmap='coolwarm',
fmt='.2f',linewidth=0.5)
[Link]('correlation matrix Heatmap')
[Link]()
[Link](data)
[Link]('pair plot of numerical Features',y=1.02)
[Link]()

lab-------3

import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns
from [Link] import load_iris
from [Link] import PCA
from [Link] import StandardScaler

iris=load_iris()
x=[Link]
y=[Link]
target_names=iris.target_names

scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)
pca=PCA(n_components=2)
x_pca=pca.fit_transform(x_scaled)
df_pca=[Link](x_pca,columns=['pc1','pc2'])
df_pca["target"]=y
[Link](figsize=(10,6))
[Link](x=df_pca['pc1'],y=df_pca['pc2']
,hue=df_pca['target'],palette='viridis',legend=True)
[Link]("principal component1")
[Link]("principal component2")
[Link]("PCA on iris dataset(40+20)")
[Link](labels=target_names)
[Link]()
lab-----------4

import pandas as pd

def find_s_algorithm(training_data):
hypothesis = None
for index, row in training_data.iterrows():
if row['play tennis'] == 'Yes':
instance = row[:-1].values

if hypothesis is None:
hypothesis = [Link]()
else:
for i in range(len(hypothesis)):
if hypothesis[i] != instance[i]:
hypothesis[i] = '?'

return hypothesis

df = pd.read_csv('[Link]')
[Link] = ['outlook', 'Temperature', 'Humidity', 'Wind', 'play
tennis']

final_hypothesis = find_s_algorithm(df)
print("Final Hypothesis:", final_hypothesis)
lab----------------------5

import numpy as np
import [Link] as plt
from [Link] import KNeighborsClassifier
[Link](42)
x=[Link](100,1)
y=[Link](['class1' if xi<=0.5 else 'class2'for xi in x[:50]])
k_values=[1,2,3,4,5,20,30]
for k in k_values:
knn=KNeighborsClassifier(n_neighbors=k)
[Link](x[:50],y)
y_pred=[Link](x[50:])
print(f"\n Result for k={k}:")
for i, label in enumerate(y_pred,start=51):
print(f"x{i}={x[i-1][0]:.3f}-> classified as {label}")
[Link](figsize=(8,5))
[Link](x[:50],[Link](50),c=['blue' if yi=='class1' else 'red' for
yi in y], label="labelled Data")
[Link](x[:50],[Link](50),c='black',marker='x',
label="unlabelled Data")
[Link]('x values')
[Link]('classification')
[Link]('KNN classification of Random Data points')
[Link]()
[Link]()

lab----------------------6

import numpy as np
import [Link] as plt
[Link](42)
x=[Link](-3,3,100)
y=[Link](x)+[Link](0,0.1,100)
x_matrix=np.c_[[Link]([Link][0]),x]
def get_weights(query_point,x,tau):
return [Link](-((x - query_point)**2) / (2 * tau**2))

def locally_weighted_regression(query_x,x,y,tau):
w=[Link](get_weights(query_x,x,tau))

theta=[Link](x_matrix.T@w@x_matrix)@(x_matrix.T@w@y)
return [Link]([1,query_x])@theta
tau_values=[0.1,0.3,1.0]
[Link](figsize=(10,6))
for tau in tau_values:
y_pred=[Link]([locally_weighted_regression(xi,x,y,tau) for xi in
x])
[Link](x,y_pred,label=f"Tau={tau}")
[Link](x,y,color="black",label="original data",alpha=0.5)
[Link]("X")
[Link]("Y")
[Link]("Locally Weighted Regression with Different Tau Values")

[Link]()
[Link]()

lab----------------------7

import pandas as pd
import numpy as np
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error

# Load the dataset


file_path =r"C:\Users\ML-34\Downloads\[Link]"
df_auto_mpg = pd.read_csv(file_path)

# Convert 'horsepower' to numeric and coerce errors to NaN


df_auto_mpg["horsepower"] =
pd.to_numeric(df_auto_mpg["horsepower"], errors="coerce")

# Drop rows with NaN values


df_auto_mpg.dropna(inplace=True)

# Define the feature and target variables


x_poly = df_auto_mpg[["horsepower"]].astype(float) # Feature:
horsepower
y_poly = df_auto_mpg["mpg"] # Target: mpg

# Split the data into training and testing sets


x_train, x_test, y_train, y_test = train_test_split(x_poly, y_poly,
test_size=0.2, random_state=42)

# Apply Polynomial Feature Transformation (Degree 2)


poly = PolynomialFeatures(degree=2)
x_train_poly = poly.fit_transform(x_train)
x_test_poly = [Link](x_test)

# Train the Polynomial Regression model


poly_reg = LinearRegression()
poly_reg.fit(x_train_poly, y_train)

# Make predictions on the test set


y_pred_poly = poly_reg.predict(x_test_poly)
# Calculate Mean Squared Error (MSE)
mse_poly = mean_squared_error(y_test, y_pred_poly)
print(f"Polynomial Regression (Degree 2) MSE: {mse_poly}")

# Sort the test data for plotting purposes


sorted_indices = [Link](x_test.[Link]())
x_test_sorted = x_test.[Link]()[sorted_indices]
y_test_sorted = y_test.[Link]()[sorted_indices]
y_pred_sorted = y_pred_poly[sorted_indices]

# Plotting the results


[Link](figsize=(8, 5))
[Link](x_test, y_test, color="blue", label="Actual MPG")
[Link](x_test_sorted, y_pred_sorted, color="red", linewidth=2,
label="Polynomial Regression (Degree 2)")
[Link]("Horsepower")
[Link]("MPG (Fuel Efficiency)")
[Link]("Polynomial Regression on Auto MPG Dataset")
[Link]()
[Link]()

lab--------------------8
import numpy as np
import pandas as pd
from [Link] import load_breast_cancer
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import accuracy_score
from sklearn import tree
import [Link] as plt

data = load_breast_cancer()
x = [Link]
y = [Link]

# Split into training and testing sets


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,
random_state=42)

# Initialize and train the Decision Tree Classifier


clf = DecisionTreeClassifier(random_state=42)
[Link](x_train, y_train)

# Make predictions
y_pred = [Link](x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Decision Tree Classifier: {accuracy * 100:.2f}%')

# Predict a new sample


new_sample = [Link]([[14.3, 18.1, 92.8, 0.097, 0.084, 0.083,
0.080, 0.077,0.064, 0.087, 0.087, 0.077, 0.089, 0.085, 0.085, 0.080,
0.095, 0.087, 0.090, 0.091, 0.080, 0.076, 0.080, 0.092, 0.079,
0.089, 0.090, 0.091, 0.089, 0.080]])
new_prediction = [Link](new_sample)

print(f'Predicted class for the new sample: {"Benign" if


new_prediction[0] == 1 else "Malignant"}')

# Plot the decision tree


[Link](figsize=(12, 8))
tree.plot_tree(clf, filled=True, feature_names=data.feature_names,
class_names=data.target_names)
[Link]()

lab-----------------------------9
import numpy as np
from [Link] import fetch_olivetti_faces
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score
data=fetch_olivetti_faces(shuffle=True,random_state=42)
x=[Link]
y=[Link]
x_train,x_test,y_train,y_test,=train_test_split(x,y,test_size=0.3,rando
m_state=42)
nb_classifier=GaussianNB()
nb_classifier.fit(x_train,y_train)
y_pred=nb_classifier.predict(x_test)
accuracy=accuracy_score(y_test,y_pred)
print(f'Accuracy of the Niave Bayes Classifier:{accuracy*100.:2f}%')
sample_index=0
new_sample=x_test[sample_index].reshape(1,-1)
new_prediction=nb_classifier.predict(new_sample)
print(f'Predicted person ID for the new sample:{new_prediction[0]}')

lab----------------------10
import numpy as np
import [Link] as plt
import pandas as pd
from [Link] import StandardScaler
from [Link] import KMeans
from [Link] import PCA
df=pd.read_csv("Prog 10-load_breast_cancer_wisconson.csv")
x=[Link](columns=['target']).values
y=df['target'].values
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)
kmeans=KMeans(n_clusters=2,random_state=42)
y_kmeans=kmeans.fit_predict(x_scaled)
pca=PCA(n_components=2)
x_pca=pca.fit_transform(x_scaled)
[Link](figsize=(8,6))
[Link](x_pca[y_kmeans==0,0],x_pca[y_kmeans==0,1],s=50,c="r
ed",label='cluster1(Malignant)')
[Link](x_pca[y_kmeans==1,0],x_pca[y_kmeans==1,1],s=50,c="
blue",label='cluster2(Benign)')
centers=kmeans.cluster_centers_
centers_pca=[Link](centers)
[Link](centers_pca[:,0],centers_pca[:,1],s=200,c="yellow",marke
r='x',label="centeroids")
[Link]("Principal component1")
[Link]("Principal component2")
[Link]("kmeans clustering on Breast cancer Dataset(PCA)")
[Link]()
[Link]()

You might also like