0% found this document useful (0 votes)
9 views4 pages

ML Lab - Exp1-10

The document outlines various machine learning experiments, including data visualization techniques such as histograms and box plots, as well as algorithms like Principal Component Analysis (PCA), k-Nearest Neighbors (KNN), and decision trees. It details steps for loading datasets, performing data analysis, and evaluating model performance using metrics like accuracy and mean squared error. Additionally, it includes code snippets for implementing these techniques using Python libraries such as pandas, numpy, and scikit-learn.

Uploaded by

jeevanyog2004
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views4 pages

ML Lab - Exp1-10

The document outlines various machine learning experiments, including data visualization techniques such as histograms and box plots, as well as algorithms like Principal Component Analysis (PCA), k-Nearest Neighbors (KNN), and decision trees. It details steps for loading datasets, performing data analysis, and evaluating model performance using metrics like accuracy and mean squared error. Additionally, it includes code snippets for implementing these techniques using Python libraries such as pandas, numpy, and scikit-learn.

Uploaded by

jeevanyog2004
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

#Exp-1:/ Create histograms,box plots,Outliers #EXP-3:/ Principal Component Analysis

import pandas as pd print(f"{feature}: {len(outliers)} outliers") import numpy as np


import numpy as np #Step-6: Optional: Print a summary of the dataset import pandas as pd
import seaborn as sns print("\nDataset Summary:") from sklearn.datasets import load_iris
import matplotlib.pyplot as plt print(housing_df.describe()) from sklearn.decomposition import PCA
from sklearn.datasets import fetch_california_housing import matplotlib.pyplot as plt
------------------------------------------------------------------------ ======================================== -------------------------------------------------------------------
# Step 1: Load the California Housing dataset #Exp-2:/ Correlation matrix # Step-1: Load the Iris dataset
import pandas as pd iris = load_iris()
data = fetch_california_housing(as_frame=True) import seaborn as sns data = iris.data
housing_df = data.frame import matplotlib.pyplot as plt labels = iris.target
---------------------------------------------------- from sklearn.datasets import fetch_california_housing label_names = iris.target_names
# Step 2: Create histograms for numerical features -------------------------------------------------------------- ----------------------------------------------------------
numerical_features = # Step 1: Load the California Housing Dataset # Step2:Convert to a DataFrame for better visualization
housing_df.select_dtypes(include=[np.number]).columns iris_df = pd.DataFrame(data, columns=iris.feature_names)
---------------------------------------------------------------------- california_data =fetch_california_housing(as_frame=True) ---------------------------------------------------------------------
# Step 3: Plot histograms ------------------------------------------------------------- #Step3: Perform PCA to reduce dimensionality to 2
plt.figure(figsize=(15, 10)) # Step2:Convert to a DataFrame components
for i, feature in enumerate(numerical_features): data = california_data.frame
plt.subplot(3, 3, i + 1) --------------------------------------------------------------- pca = PCA(n_components=2)
sns.histplot(housing_df[feature], kde=True, bins=30, # Step 3: Compute the correlation matrix data_reduced = pca.fit_transform(data)
color='blue') correlation_matrix = data.corr() --------------------------------------------------------------
plt.title(f'Distribution of {feature}') -------------------------------------------------------------- # Step4:Create a DataFrame for the reduced data
plt.tight_layout() # Step 4: Visualize the correlation matrix using a reduced_df = pd.DataFrame(data_reduced, columns=['Principal
plt.show() heatmap Component 1', 'Principal Component 2'])
----------------------------------------------------------- plt.figure(figsize=(10, 8)) reduced_df['Label'] = labels
# Step 4: Generate box plots for numerical features sns.heatmap(correlation_matrix, annot=True, -----------------------------------------------------------------
plt.figure(figsize=(15, 10)) cmap='coolwarm', fmt='.2f', linewidths=0.5) # Step5: Visualize the 2D PCA result
for i, feature in enumerate(numerical_features): plt.title('Correlation Matrix of California Housing plt.figure(figsize=(8, 6))
plt.subplot(3, 3, i + 1) Features') colors = ['r', 'g', 'b']
sns.boxplot(x=housing_df[feature], color='orange') plt.show() for i, label in enumerate(np.unique(labels)):
plt.title(f'Box Plot of {feature}') ------------------------------------------------------ plt.scatter(
plt.tight_layout() # Step 5: Create a pair plot to visualize pairwise reduced_df[reduced_df['Label'] == label]['Principal
plt.show() relationships Component 1'],
---------------------------------------------------------------------------- reduced_df[reduced_df['Label'] == label]['Principal
# Step 5: Optional: print outliers count sns.pairplot(data, diag_kind='kde', plot_kws={'alpha': Component 2'],
print("Outliers Detection:") 0.5}) label=label_names[label],
outliers_summary = {} plt.suptitle('Pair Plot of California Housing Features', color=colors[i]
for feature in numerical_features: y=1.02) )
Q1 = housing_df[feature].quantile(0.25) plt.show()
Q3 = housing_df[feature].quantile(0.75) plt.title('PCA on Iris Dataset')
IQR = Q3 - Q1 plt.xlabel('Principal Component 1')
lower_bound = Q1 - 1.5 * IQR plt.ylabel('Principal Component 2')
upper_bound = Q3 + 1.5 * IQR plt.legend()
outliers = housing_df[(housing_df[feature] < lower_bound) | plt.grid()
(housing_df[feature] > upper_bound)] plt.show()
outliers_summary[feature] = len(outliers)
plt.xlabel("x value")
#EXP-4/Find-S algorithm #EXP-5:/ k-Nearest Neighbour algorithm plt.ylabel("k (for visualization only)")
import pandas as pd import numpy as np plt.show()
------------------------------------------------------------------------------------ from sklearn.neighbors import KNeighborsClassifier plt.legend()
def find_s_algorithm(data_file): import matplotlib.pyplot as plt plt.title("KNN Classification of x values")
# Step 1: Load training data from CSV --------------------------------------------------------------- plt.grid(True)
data = pd.read_csv(data_file) # Step1. Generate 100 random x values in range [0, 1] -----------------------------------------------------------
#Step2:Print data file x = np.random.rand(100).reshape(-1, 1) #EXP-6/Locally Weighted Regression algorithm
print("data_file:") ---------------------------------------------------------------- import numpy as np
print(data) #Step 2. Assign labels to first 50 import matplotlib.pyplot as plt
------------------------------------------------------------------------------- y = np.array(['Class 1' if xi <= 0.5 else 'Class 2' for xi in #Step1: Generate sample nonlinear dataset
# Step 3: Identify the columns and the target attribute x[:50].flatten()]) np.random.seed(42)
# Assuming the last column is the target ----------------------------------------------------------------------- X = np.linspace(-3, 3, 100).reshape(-1, 1)
target_column = data.columns[-1] #Step 3. Prepare training and test data y = np.sin(X).ravel() + np.random.normal(scale=0.1,
# All columns except the target X_train = x[:50] size=X.shape[0])
features = data.columns[:-1] X_test = x[50:] # Step2Locally Weighted Regression function
------------------------------------------------------------------------ --------------------------------------------------------------------- def locally_weighted_regression(x_query, X_train, y_train, tau):
# Step 4: Initialize the hypothesis with the first positive example #Step 4. Test for different k values -------------------------------------------------------------------
# Assuming 'Yes' represents positive examples k_values = [1, 2, 3, 4, 5, 20, 30] # Step3:Compute weights using Gaussian kernel
positive_data = data[data[target_column] == 'Yes'] results = {} W = np.exp(-((X_train - x_query) ** 2) / (2 * tau ** 2))
------------------------------------------------------------------------------------------ Step5:Apply KNN for each k ------------------------------------------------------------------
# Step5:The first positive example for k in k_values: # Step4:Add bias (intercept) term for linear regression
hypothesis = positive_data.iloc[0, :-1].values knn = KNeighborsClassifier(n_neighbors=k) X_bias = np.c_[np.ones_like(X_train), X_train]
----------------------------------------------------------------------------- knn.fit(X_train, y) -------------------------------------------------------------------
# Step 6: Update the hypothesis based on all positive examples y_pred = knn.predict(X_test) # Step5:Weighted linear regression: θ = (X^T W X)^(-1)
for i, example in positive_data.iterrows(): #Dictionary mapping each k to predicted classes for X^T W y
# Compare and generalize hypothesis if necessary x[51:100] # Predict y for x_query
for j in range(len(hypothesis)): results[k] = y_pred
if hypothesis[j] != example[features[j]]: ---------------------------------------------------- theta = np.linalg.pinv(X_bias.T @ np.diag(W.ravel()) @
# Use '?' to generalize when mismatches occur # Step5:Display results X_bias) @ X_bias.T @ np.diag(W.ravel()) @ y_train
hypothesis[j] = '?' for k in k_values: return np.array([1, x_query]) @ theta
--------------------------------------------------------------------------------- print(f"\nK = {k} classification results:") --------------------------------------------------------------------
# Step 7: Output the final hypothesis for i, pred in enumerate(results[k]): # Step6:Prediction over a dense grid
return hypothesis print(f"x{51 + i}: {X_test[i][0]:.3f} -> {pred}") X_test = np.linspace(-3, 3, 100)
-------------------------------------------------------------------------------- ------------------------------------------------------------------ y_pred = np.array([locally_weighted_regression(x, X, y,
# Step8:Example usage # Step6:Plot for visual understanding(Optional) tau=0.5) for x in X_test])
# Path to the CSV file plt.figure(figsize=(10, 6)) -------------------------------------------------
data_file = r'C:/Users/91943/Desktop/KITM/ML LAB/training_data.csv' plt.scatter(X_train, [0]*50, c=['red' if yi == 'Class 1' else #Step7: Plotting the result
-------------------------------------------------------------------------- 'blue' for yi in y], label='Training Data') plt.scatter(X, y, color="gray", alpha=0.5, label="Training Data")
# Step9:Main function for k in k_values: plt.plot(X_test, y_pred, color="red", linewidth=2, label="LWR
final_hypothesis = find_s_algorithm(data_file) y_pred_k = results[k] Fit (τ=0.5)")
------------------------------------------------------------------------- plt.scatter(X_test, [k]*50, c=['red' if yi == 'Class 1' else plt.legend()
#Step10: Print final hypothesis 'blue' for yi in y_pred_k], marker='x', label=f'Test plt.show()
print("\nFinal Hypothesis:", final_hypothesis) (k={k})')
plt.yticks(k_values + [0])
EXP8/decision tree algorithm
#EXP-7: Linear Regression #EXP-7: polynomial Regression: import numpy as np
import numpy as np import numpy as np import matplotlib.pyplot as plt
import pandas as pd import pandas as pd from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt import matplotlib.pyplot as plt from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split import seaborn as sns from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import PolynomialFeatures import matplotlib.pyplot as plt
---------------------------------------------------------------------------------- from sklearn.metrics import mean_squared_error ---------------------------------------------------------------
# Step1:Load the Boston Housing Dataset #Step1:Load Auto MPG dataset #Step 1. Load the dataset
boston_df = auto_mpg = data = load_breast_cancer()
pd.read_csv(r"C:\Users\91943\Downloads\boston_housing_data.csv") pd.read_csv(r"C:\Users\91943\Downloads\auto-mpg.csv") --------------------------------------------------------------
print("Linear Regression on Boston Housing Dataset") auto_mpg.dropna(inplace=True) #Step 2. Split into train and test sets
#Step2:Choose one feature for visualization #Step2: Remove missing values X_train, X_test, y_train, y_test = train_test_split(data.data,
# Using 'RM' (average number of rooms) as the feature auto_mpg = auto_mpg[auto_mpg['horsepower'] != '?'] data.target, test_size=0.2, random_state=42)
X = boston_df[['RM']] auto_mpg['horsepower'] = -------------------------------------------------------------
---------------------------------------------------------------------------- auto_mpg['horsepower'].astype(float) #Step 3. Train Decision Tree Classifier
# Step3:AssignTarget variable #Step3: Predictor and target model = DecisionTreeClassifier(max_depth=4,
y = boston_df['MEDV'] X_auto = auto_mpg[['horsepower']] random_state=42)
-------------------------------------------------------------------------- y_auto = auto_mpg['mpg'] model.fit(X_train, y_train)
# Step4:Split the data into training and testing sets --------------------------------------------------------------
Step4:Split the data into training and testing sets # Step4. Evaluate the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, X_train, X_test, y_train, y_test = train_test_split(X_auto,
random_state=42) print(f"Accuracy: {accuracy_score(y_test,
y_auto, test_size=0.2, random_state=42) model.predict(X_test)):.2f}")
------------------------------------------------------------------------ --------------------------------------------------------------
# Step5:Create and train the Linear Regression model -----------------------------------------------------------------
#Step5:Create and train Polynomial Regression (degree # Step5. Visualize the Decision Tree
LR_model = LinearRegression() 2)
LR_model.fit(X_train, y_train) plot_tree(model, filled=True,
poly = PolynomialFeatures(degree=2) feature_names=data.feature_names,
---------------------------------------------------------------------------- X_train_poly = poly.fit_transform(X_train)
# Step6:Make predictions class_names=data.target_names)
X_test_poly = poly.transform(X_test) plt.show()
y_pred = LR_model.predict(X_test) ------------------------------------------------------------
-------------------------------------------------------------------------- --------------------------------------------------------------------
#Step6: creates an instance of the LinearRegression
# Step7:Evaluate the model/Performance #Step 6. Classify a new sample
model from scikit-learn sample = [[15.3, 20.5, 85.2, 521, 0.1, 0.1, 0.08, 0.18, 0.19, 0.06,
mse = mean_squared_error(y_test, y_pred) poly_reg = LinearRegression()
r2 = r2_score(y_test, y_pred) 0.32, 0.43, 2.5, 20.3, 0.007, 0.018, 0.016, 0.003, 0.015, 0.002,
#Fits the linear model to the transformed training data. 17.5, 25.0, 110.5, 900, 0.14, 0.25, 0.18, 0.25, 0.27, 0.09]]
print(f"Mean Squared Error: {mse:.4f}") poly_reg.fit(X_train_poly, y_train)
print(f"R^2 Score: {r2:.4f}") =========================================
#Predicts the target variable (MPG) on the test data #Step7: Print predicted class
------------------------------------------------------------------------------------------ y_pred_poly = poly_reg.predict(X_test_poly)
#Step8: Plot the results
#Step7: Plot the results print("Predicted Class:",
plt.scatter(X_test, y_test, color='green', label='Actual')
plt.plot(X_test, y_pred, color='red', label='Predicted') plt.scatter(X_test, y_test, color="blue", label="Actual data.target_names[model.predict(sample)[0]])
plt.xlabel('Average Number of Rooms (RM)') Data", alpha=0.5)
plt.ylabel('House Price (MEDV)') plt.scatter(X_test, y_pred_poly, color="red",
plt.title('Linear Regression on Boston Housing Dataset') label="Predicted Data", alpha=0.5)
plt.legend() plt.xlabel("Horsepower")
plt.show() plt.ylabel("MPG")
plt.legend()
plt.title("Polynomial Regression on Auto MPG Dataset")
plt.show()
#EXP-9/Naive Bayesian classifier #EXP-10/k-means clustering # Clustering result
from sklearn.datasets import fetch_olivetti_faces import numpy as np plt.subplot(1, 2, 1)
from sklearn.model_selection import train_test_split import pandas as pd sns.scatterplot(data=plot_df, x='PCA1', y='PCA2', hue='Cluster',
from sklearn.naive_bayes import GaussianNB import matplotlib.pyplot as plt palette='Set2')
from sklearn.metrics import accuracy_score import seaborn as sns plt.title('K-Means Clustering Result')
import matplotlib.pyplot as plt from sklearn.datasets import load_breast_cancer --------------------------------------------------------------------
---------------------------------------------------------------------- from sklearn.cluster import KMeans #Step8: Actual labels
# Step 1: Load the Olivetti face dataset from sklearn.preprocessing import StandardScaler plt.subplot(1, 2, 2)
data = fetch_olivetti_faces() from sklearn.decomposition import PCA sns.scatterplot(data=plot_df, x='PCA1', y='PCA2', hue='Actual',
X = data.data # Each image is flattened (64x64 = 4096 pixels) from sklearn.metrics import confusion_matrix, palette='Set1')
y = data.target # Labels: person IDs (0–39) classification_report plt.title('Actual Cancer Types')
-------------------------------------------------------------------- --------------------------------------------------------------------- plt.tight_layout()
# Step 2: Split data into training and test sets #Step1: Load the dataset plt.show()
# Use 7 images per person for training, 3 for testing data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split( X = data.data
X, y, test_size=0.3, stratify=y, random_state=42 y = data.target
) ----------------------------------------------------------------------- *****Red color line in code are
-------------------------------------------------------------------
# Step 3: Train Naive Bayes classifier
#Step2: Standardize the features
scaler = StandardScaler()
optional
nb_classifier = GaussianNB() X_scaled = scaler.fit_transform(X)
nb_classifier.fit(X_train, y_train) -----------------------------------------------------------------------
----------------------------------------------------------- #Step3: Perform K-Means clustering
# Step 4: Predict and evaluate kmeans = KMeans(n_clusters=2, random_state=42)
y_pred = nb_classifier.predict(X_test) y_kmeans = kmeans.fit_predict(X_scaled)
# Step5:Print show accuracy -----------------------------------------------------------------------
accuracy = accuracy_score(y_test, y_pred) # Step3:Print Confusion matrix with report(optional)
print(f"Naive Bayes Classifier Accuracy: {accuracy * 100:.2f}%") print("Confusion Matrix:")
---------------------------------------------------------------------- print(confusion_matrix(y, y_kmeans))
# Step6: Show a few test faces with predictions(Optional) print("\nClassification Report:")
plt.figure(figsize=(10, 4)) print(classification_report(y, y_kmeans))
for i in range(8): -----------------------------------------------------------------------
plt.subplot(2, 4, i+1) # Step5:Reduce dimensions for visualization using PCA
plt.imshow(X_test[i].reshape(64, 64), cmap='gray') pca = PCA(n_components=2)
plt.title(f"Pred: {y_pred[i]}\nTrue: {y_test[i]}") X_pca = pca.fit_transform(X_scaled)
plt.axis('off') ---------------------------------------------------------------------
plt.tight_layout() # Step6:Create a DataFrame for plotting
plt.show() df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
df['Cluster'] = y_kmeans
df['True Label'] = y
-----------------------------------------------------------------------
#Step7: Plot clustering results
plt.figure(figsize=(12, 5))

-----------------------------------------------------------------------

You might also like