Course: AI and Block chain in Management
Report on
Case Study: Implementing LeNet-5 for Handwritten Digit
Recognition (MNIST Dataset)
Submitted by
NAME: -K. Riya Sanjana
ROLL NO: -AP23322130042
Submitted to:
Mrs. Subhashri Vasudevan
SRM University –AP Andhra Pradesh
Code:
# Import necessary libraries
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
# Load and split the MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Preprocessing: Normalize and reshape images
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape((-1, 28, 28, 1)) # Reshape for CNN
x_test = x_test.reshape((-1, 28, 28, 1))
# Define the LeNet-5 architecture
model = keras.Sequential([
# C1: Convolution Layer with 6 filters of size 5x5, ReLU activation
keras.layers.Conv2D(6, (5, 5), activation='relu', input_shape=(28, 28, 1), padding='same'),
# S2: Average Pooling Layer with 2x2 pool size
keras.layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2)),
# C3: Convolution Layer with 16 filters of size 5x5, ReLU activation
keras.layers.Conv2D(16, (5, 5), activation='relu'),
# S4: Average Pooling Layer with 2x2 pool size
keras.layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2)),
# C5: Fully Connected Convolution Layer with 120 units
keras.layers.Conv2D(120, (5, 5), activation='relu'),
# Flatten for fully connected layers
keras.layers.Flatten(),
# F6: Fully Connected Layer with 84 units, ReLU activation
keras.layers.Dense(84, activation='relu'),
# Output Layer: 10 units (one for each digit), softmax activation
keras.layers.Dense(10, activation='softmax')
])
# Compile the model
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# Model summary
model.summary()
# Train the model
history = model.fit(x_train, y_train, epochs=10, validation_split=0.1, batch_size=128)
# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {test_acc:.2f}')
# Plot training and validation accuracy and loss
plt.figure(figsize=(12, 5))
# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
# Visualize predictions on random test images
num_samples = 5
indices = np.random.choice(range(len(x_test)), num_samples, replace=False)
plt.figure(figsize=(10, 5))
for i, index in enumerate(indices):
img = x_test[index]
prediction = np.argmax(model.predict(img.reshape(1, 28, 28, 1)))
plt.subplot(1, num_samples, i + 1)
plt.imshow(img.squeeze(), cmap='gray')
plt.title(f'Predicted: {prediction}')
plt.axis('off')
plt.show()
Output of the Code:
https://colab.research.google.com/drive/1NGsvuhjkmro_ND8CBf6HYYmA1gdL1e92?usp=sharing
Additional Questions for You:
1. Impact of Changing Filter Size or Number of Filters:
Altering the filter size or quantity of filters in a CNN affects both its performance and complexity. A
larger filter size, like going from, allows the network to detect larger patterns, though it requires
more computation. Increasing the number of filters enables the model to learn a broader range of
features, which may improve accuracy but also adds to the model's size and processing time. This
can enhance performance, but if the model becomes too complex, it risks overfitting.
2. Role of Pooling in Simplifying the Model:
Pooling layers, such as MaxPooling, reduce the spatial size of the data as it passes through the
network. By decreasing the number of parameters and operations in subsequent layers, pooling
helps the model generalize better while also lowering computational requirements. This down
sampling retains essential features and discards unnecessary details, promoting efficiency and
reducing overfitting.
3. Including Batch Normalization or Dropout:
Adding Batch Normalization: after layers helps the model learn faster by normalizing the input
values within each mini-batch, which can enhance model stability and generalization.
Dropout, as used in the current model, deactivates a random fraction of nodes during training to
minimize overfitting. Using batch normalization together with dropout improves training stability
and robustness, potentially leading to better overall performance, especially on more complex data.
Given Code:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# plot 4 images as gray scale
#plt.subplot(221)
#plt.imshow(x_train[999], cmap=plt.get_cmap('gray'))
#plt.subplot(222)
#plt.imshow(x_train[100], cmap=plt.get_cmap('gray'))
#plt.subplot(223)
#plt.imshow(x_train[10], cmap=plt.get_cmap('gray'))
#plt.subplot(224)
#plt.imshow(x_train[77], cmap=plt.get_cmap('gray'))
# show the plot
#plt.show()
# Define CNN model
model = tf.keras.Sequential([
keras.layers.Conv2D(64, (3, 3), activation='relu',
input_shape=(28, 28, 1)),
keras.layers.MaxPooling2D((2, 2)),
keras.layers.Conv2D(64, (3, 3), activation='relu'),
keras.layers.MaxPooling2D((2, 2)),
keras.layers.Conv2D(64, (3, 3), activation='relu'),
keras.layers.Flatten(),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dropout(0.2),
keras.layers.Dense(10, activation='softmax')
])
# Compile model
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
#model.count_params()
# Train model
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,
y_test))
# Evaluate model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.2f}')
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
img = cv2.imread('//content/sample_data/download.jpg',
cv2.IMREAD_GRAYSCALE)
if img is not None:
# Resize the image to 28x28
img = cv2.resize(img, (28, 28))
# Invert the image (MNIST digits are black on white)
img = 255 - img
# Normalize the image
img = img.astype('float32') / 255
# Reshape the image for the model
img = img.reshape((1, 28, 28, 1))
# Make a prediction
prediction = model.predict(img)
predicted_digit = np.argmax(prediction)
print(f'Predicted digit: {predicted_digit}')
cv2_imshow(img.reshape(28, 28))
else:
print('Error: Could not load the image.')
Output of the Code:
https://colab.research.google.com/drive/1jQGo4ByRf4qdhrxZi7ElXyu_Eacp7k0i?usp=sharing