from
keras.datasets import imdb
# Load the data, keeping only 10,000 of the most frequently occuring words
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 1)
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
17464789/17464789 [==============================] - 0s 0us/step
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from keras.utils import to_categorical
from keras import models
from keras import layers
# Since we restricted ourselves to the top 10000 frequent words, no word index should exceed 10000
# we'll verify this below
# Here is a list of maximum indexes in every review --- we search the maximum index in this list of
print(type([max(sequence) for sequence in train_data]))
# Find the maximum of all max indexes
max([max(sequence) for sequence in train data])
max([max(sequence) for sequence in train_data])
<class 'list'>
2
# Let's quickly decode a review
# step 1: load the dictionary mappings from word to integer index
word_index = imdb.get_word_index()
# step 2: reverse word index to map integer indexes to their respective words
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
# Step 3: decode the review, mapping integer indices to words
#
# indices are off by 3 because 0, 1, and 2 are reserverd indices for "padding", "Start of sequence"
decoded_review = ' '.join([reverse_word_index.get(i-3, '?') for i in train_data[0]])
decoded_review
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_i
1641221/1641221 [==============================] - 0s 0us/step
'? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
results = np.zeros((len(sequences), dimension)) # Creates an all zero matrix of shape (len(s
for i,sequence in enumerate(sequences):
results[i,sequence] = 1 # Sets specific indices of results[i] to 1s
return results
# Vectorize training Data
X_train = vectorize_sequences(train_data)
# Vectorize testing Data
X_test = vectorize_sequences(test_data)
X_train[0]
array([0., 0., 1., ..., 0., 0., 0.])
X_train.shape
(25000, 10000)
Vectorize labels
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
Building the Neural Network
Our input data is vectors that need to be mapped to scaler labels (0s and 1s). This is one of the easiest
setups, and a simple stack of fully-connected, Dense layers with relu activation perform quite well.
Hidden layers In this network, we will leverage hidden layers. We will define our layers as such.
from keras.models import Sequential
from keras.layers import Flatten,Dense
model=Sequential()
from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
from keras import optimizers
from keras import losses
from keras import metrics
model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001),
loss = losses.binary_crossentropy,
metrics = [metrics.binary_accuracy])
Setting up Validation
# Input for Validation
X_val = X_train[:10000]
partial_X_train = X_train[10000:]
# Labels for validation
y_val = y_train[:10000]
partial_y_train = y_train[10000:]
history = model.fit(partial_X_train,
partial_y_train,
epochs=20,
batch_size=512,
validation_data=(X_val, y_val))
Epoch 1/20
30/30 [==============================] - 3s 68ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 2/20
30/30 [==============================] - 1s 40ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 3/20
30/30 [==============================] - 1s 39ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 4/20
30/30 [==============================] - 1s 37ms/step - loss: 0.6931 - binary_accuracy: 0.503
Epoch 5/20
30/30 [==============================] - 1s 37ms/step - loss: 0.6932 - binary_accuracy: 0.500
Epoch 6/20
30/30 [==============================] - 1s 36ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 7/20
30/30 [==============================] - 2s 51ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 8/20
30/30 [==============================] - 2s 64ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 9/20
30/30 [==============================] - 1s 40ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 10/20
30/30 [==============================] - 1s 38ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 11/20
30/30 [==============================] - 1s 39ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 12/20
30/30 [==============================] - 1s 50ms/step - loss: 0.6932 - binary_accuracy: 0.503
Epoch 13/20
30/30 [==============================] - 1s 46ms/step - loss: 0.6931 - binary_accuracy: 0.500
Epoch 14/20
30/30 [==============================] - 1s 47ms/step - loss: 0.6932 - binary_accuracy: 0.497
Epoch 15/20
30/30 [==============================] - 1s 38ms/step - loss: 0.6931 - binary_accuracy: 0.501
Epoch 16/20
30/30 [==============================] - 1s 39ms/step - loss: 0.6931 - binary_accuracy: 0.503
Epoch 17/20
30/30 [==============================] - 2s 68ms/step - loss: 0.6931 - binary_accuracy: 0.503
Epoch 18/20
30/30 [==============================] - 1s 47ms/step - loss: 0.6931 - binary_accuracy: 0.503
Epoch 19/20
30/30 [==============================] - 1s 38ms/step - loss: 0.6931 - binary_accuracy: 0.503
Epoch 20/20
30/30 [==============================] - 1s 38ms/step - loss: 0.6931 - binary_accuracy: 0.503
history_dict = history.history
history_dict.keys()
dict_keys(['loss', 'binary_accuracy', 'val_loss', 'val_binary_accuracy'])
# Making Predictions for testing data
np.set_printoptions(suppress=True)
result = model.predict(X_test)
782/782 [==============================] - 2s 3ms/step
result
array([[0.4968544],
[0.4968544],
[0.4968544],
...,
[0.4968544],
[0.4968544],
[0.4968544]], dtype=float32)
y_pred = np.zeros(len(result))
for i, score in enumerate(result):
y_pred[i] = 1 if score > 0.5 else 0
y_pred
array([0., 0., 0., ..., 0., 0., 0.])
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
0.5
import matplotlib.pyplot as plt
import numpy
from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
cm_display·=·metrics.ConfusionMatrixDisplay(confusion_matrix·=·confusion_matrix,·display_labels·=·[
cm_display.plot()
plt.show()