This is a companion notebook for the book Deep Learning with Python, Second Edition.
For
readability, it only contains runnable code blocks and section titles, and omits everything
else in the book: text paragraphs, figures, and pseudocode.
If you want to be able to follow what's going on, I recommend reading the notebook
side by side with your copy of the book.
This notebook was generated for TensorFlow 2.6.
The mathematical building blocks of neural networks
A first look at a neural network
Loading the MNIST dataset in Keras
from [Link] import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images.shape
len(train_labels)
train_labels
test_images.shape
len(test_labels)
test_labels
The network architecture
from tensorflow import keras
from [Link] import layers
model = [Link]([
[Link](512, activation="relu"),
[Link](10, activation="softmax")
])
The compilation step
[Link](optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
Preparing the image data
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255
"Fitting" the model
[Link](train_images, train_labels, epochs=5, batch_size=128)
Using the model to make predictions
test_digits = test_images[0:10]
predictions = [Link](test_digits)
predictions[0]
predictions[0].argmax()
predictions[0][7]
test_labels[0]
Evaluating the model on new data
test_loss, test_acc = [Link](test_images, test_labels)
print(f"test_acc: {test_acc}")
Data representations for neural networks
Scalars (rank-0 tensors)
import numpy as np
x = [Link](12)
x
[Link]
Vectors (rank-1 tensors)
x = [Link]([12, 3, 6, 14, 7])
x
[Link]
Matrices (rank-2 tensors)
x = [Link]([[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1],
[7, 80, 4, 36, 2]])
[Link]
Rank-3 and higher-rank tensors
x = [Link]([[[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1],
[7, 80, 4, 36, 2]],
[[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1],
[7, 80, 4, 36, 2]],
[[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1],
[7, 80, 4, 36, 2]]])
[Link]
Key attributes
from [Link] import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images.ndim
train_images.shape
train_images.dtype
Displaying the fourth digit
import [Link] as plt
digit = train_images[4]
[Link](digit, cmap=[Link])
[Link]()
train_labels[4]
Manipulating tensors in NumPy
my_slice = train_images[10:100]
my_slice.shape
my_slice = train_images[10:100, :, :]
my_slice.shape
my_slice = train_images[10:100, 0:28, 0:28]
my_slice.shape
my_slice = train_images[:, 14:, 14:]
my_slice = train_images[:, 7:-7, 7:-7]
The notion of data batches
batch = train_images[:128]
batch = train_images[128:256]
n = 3
batch = train_images[128 * n:128 * (n + 1)]
Real-world examples of data tensors
Vector data
Timeseries data or sequence data
Image data
Video data
The gears of neural networks: tensor operations
Element-wise operations
def naive_relu(x):
assert len([Link]) == 2
x = [Link]()
for i in range([Link][0]):
for j in range([Link][1]):
x[i, j] = max(x[i, j], 0)
return x
def naive_add(x, y):
assert len([Link]) == 2
assert [Link] == [Link]
x = [Link]()
for i in range([Link][0]):
for j in range([Link][1]):
x[i, j] += y[i, j]
return x
import time
x = [Link]((20, 100))
y = [Link]((20, 100))
t0 = [Link]()
for _ in range(1000):
z = x + y
z = [Link](z, 0.)
print("Took: {0:.2f} s".format([Link]() - t0))
t0 = [Link]()
for _ in range(1000):
z = naive_add(x, y)
z = naive_relu(z)
print("Took: {0:.2f} s".format([Link]() - t0))
Broadcasting
import numpy as np
X = [Link]((32, 10))
y = [Link]((10,))
y = np.expand_dims(y, axis=0)
Y = [Link]([y] * 32, axis=0)
def naive_add_matrix_and_vector(x, y):
assert len([Link]) == 2
assert len([Link]) == 1
assert [Link][1] == [Link][0]
x = [Link]()
for i in range([Link][0]):
for j in range([Link][1]):
x[i, j] += y[j]
return x
import numpy as np
x = [Link]((64, 3, 32, 10))
y = [Link]((32, 10))
z = [Link](x, y)
Tensor product
x = [Link]((32,))
y = [Link]((32,))
z = [Link](x, y)
def naive_vector_dot(x, y):
assert len([Link]) == 1
assert len([Link]) == 1
assert [Link][0] == [Link][0]
z = 0.
for i in range([Link][0]):
z += x[i] * y[i]
return z
def naive_matrix_vector_dot(x, y):
assert len([Link]) == 2
assert len([Link]) == 1
assert [Link][1] == [Link][0]
z = [Link]([Link][0])
for i in range([Link][0]):
for j in range([Link][1]):
z[i] += x[i, j] * y[j]
return z
def naive_matrix_vector_dot(x, y):
z = [Link]([Link][0])
for i in range([Link][0]):
z[i] = naive_vector_dot(x[i, :], y)
return z
def naive_matrix_dot(x, y):
assert len([Link]) == 2
assert len([Link]) == 2
assert [Link][1] == [Link][0]
z = [Link](([Link][0], [Link][1]))
for i in range([Link][0]):
for j in range([Link][1]):
row_x = x[i, :]
column_y = y[:, j]
z[i, j] = naive_vector_dot(row_x, column_y)
return z
Tensor reshaping
train_images = train_images.reshape((60000, 28 * 28))
x = [Link]([[0., 1.],
[2., 3.],
[4., 5.]])
[Link]
x = [Link]((6, 1))
x
x = [Link]((300, 20))
x = [Link](x)
[Link]
Geometric interpretation of tensor operations
A geometric interpretation of deep learning
The engine of neural networks: gradient-based optimization
What's a derivative?
Derivative of a tensor operation: the gradient
Stochastic gradient descent
Chaining derivatives: The Backpropagation algorithm
The chain rule
Automatic differentiation with computation graphs
The gradient tape in TensorFlow
import tensorflow as tf
x = [Link](0.)
with [Link]() as tape:
y = 2 * x + 3
grad_of_y_wrt_x = [Link](y, x)
x = [Link]([Link]((2, 2)))
with [Link]() as tape:
y = 2 * x + 3
grad_of_y_wrt_x = [Link](y, x)
W = [Link]([Link]((2, 2)))
b = [Link]([Link]((2,)))
x = [Link]((2, 2))
with [Link]() as tape:
y = [Link](x, W) + b
grad_of_y_wrt_W_and_b = [Link](y, [W, b])
Looking back at our first example
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255
model = [Link]([
[Link](512, activation="relu"),
[Link](10, activation="softmax")
])
[Link](optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
[Link](train_images, train_labels, epochs=5, batch_size=128)
Reimplementing our first example from scratch in TensorFlow
A simple Dense class
import tensorflow as tf
class NaiveDense:
def __init__(self, input_size, output_size, activation):
[Link] = activation
w_shape = (input_size, output_size)
w_initial_value = [Link](w_shape, minval=0, maxval=1e-1)
self.W = [Link](w_initial_value)
b_shape = (output_size,)
b_initial_value = [Link](b_shape)
self.b = [Link](b_initial_value)
def __call__(self, inputs):
return [Link]([Link](inputs, self.W) + self.b)
@property
def weights(self):
return [self.W, self.b]
A simple Sequential class
class NaiveSequential:
def __init__(self, layers):
[Link] = layers
def __call__(self, inputs):
x = inputs
for layer in [Link]:
x = layer(x)
return x
@property
def weights(self):
weights = []
for layer in [Link]:
weights += [Link]
return weights
model = NaiveSequential([
NaiveDense(input_size=28 * 28, output_size=512, activation=[Link]),
NaiveDense(input_size=512, output_size=10, activation=[Link])
])
assert len([Link]) == 4
A batch generator
import math
class BatchGenerator:
def __init__(self, images, labels, batch_size=128):
assert len(images) == len(labels)
[Link] = 0
[Link] = images
[Link] = labels
self.batch_size = batch_size
self.num_batches = [Link](len(images) / batch_size)
def next(self):
images = [Link][[Link] : [Link] + self.batch_size]
labels = [Link][[Link] : [Link] + self.batch_size]
[Link] += self.batch_size
return images, labels
Running one training step
def one_training_step(model, images_batch, labels_batch):
with [Link]() as tape:
predictions = model(images_batch)
per_sample_losses = [Link].sparse_categorical_crossentropy(
labels_batch, predictions)
average_loss = tf.reduce_mean(per_sample_losses)
gradients = [Link](average_loss, [Link])
update_weights(gradients, [Link])
return average_loss
learning_rate = 1e-3
def update_weights(gradients, weights):
for g, w in zip(gradients, weights):
w.assign_sub(g * learning_rate)
from [Link] import optimizers
optimizer = [Link](learning_rate=1e-3)
def update_weights(gradients, weights):
optimizer.apply_gradients(zip(gradients, weights))
The full training loop
def fit(model, images, labels, epochs, batch_size=128):
for epoch_counter in range(epochs):
print(f"Epoch {epoch_counter}")
batch_generator = BatchGenerator(images, labels)
for batch_counter in range(batch_generator.num_batches):
images_batch, labels_batch = batch_generator.next()
loss = one_training_step(model, images_batch, labels_batch)
if batch_counter % 100 == 0:
print(f"loss at batch {batch_counter}: {loss:.2f}")
from [Link] import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255
fit(model, train_images, train_labels, epochs=10, batch_size=128)
Evaluating the model
predictions = model(test_images)
predictions = [Link]()
predicted_labels = [Link](predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {[Link]():.2f}")
Summary