0% found this document useful (0 votes)
13 views11 pages

Correct The Error

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views11 pages

Correct The Error

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

This is the call I made :

enc_net = VanillaRNN(embed_dim, hidden_dim)


dec_net = VanillaRNN(embed_dim, hidden_dim)
rnn_net = RNNTranslator(eng_vocab.vocab_size, fra_vocab.vocab_size, embed_dim, hidden_dim,
enc_net, dec_net)
trained_rnn_net = train_model(rnn_net, train_iter, val_iter, lr, epochs, DEVICE,
run_name="VanillaRNN-VanillaRNN-baseline")

But I have this error :


---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[27], line 1
----> 1 trained_rnn_net = train_model(rnn_net, train_iter, val_iter, lr, epochs, DEVICE,
run_name="VanillaRNN-VanillaRNN-baseline")
Cell In[23], line 52, in train_model(model, train_iter, val_iter, lr, epochs, device, run_name)
49 optimizer.zero_grad() # Reset gradients
51 # Forward pass: Pass source_seq, source_lengths, target_seq[:, :-1], and target_lengths
---> 52 outputs, _ = model(source_seq, source_lengths, target_seq[:, :-1], target_lengths) # Pass all 4
arguments
54 # Calculate the loss, ignoring BOS token and PAD token
55 loss = criterion(outputs.view(-1, outputs.size(-1)), target_seq[:, 1:].view(-1)) # Exclude BOS
token
File ~/miniconda3/envs/ddpm/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in
Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/ddpm/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in
Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or
self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
Cell In[20], line 46, in RNNTranslator.forward(self, source_seq, source_lengths, target_seq,
target_lengths)
42 initial_hidden, initial_cell = self.encoder(source_seq, source_lengths) # Encoder returns hidden
and cell states
44 # Step 2: Decode the target sequence using the decoder
45 # During training, we use teacher forcing (target_seq is available)
---> 46 logits, _ = self.decoder(initial_hidden, initial_cell, target_seq)
48
#################################################################################
###
49 # END OF YOUR CODE
50
#################################################################################
###
52 return logits
File ~/miniconda3/envs/ddpm/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in
Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/ddpm/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in
Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or
self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
Cell In[19], line 41, in RNNDecoder.forward(self, initial_hidden, initial_cell, target_seq)
39 for t in range(seq_length):
40 input_t = self.embedding(target_seq[:, t]) # Shape: (batch_size, embed_dim)
---> 41 hidden_state, _ = self.rnn(input_t.unsqueeze(1), hidden_state) # Forward through RNN
42 output_t = self.custom_linear(hidden_state) # Compute the output
43 outputs.append(output_t)
File ~/miniconda3/envs/ddpm/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in
Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/ddpm/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in
Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or
self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
Cell In[13], line 74, in VanillaRNN.forward(self, x, h_0, valid_len, **kwargs)
67 h_t = self.rnn_cell(x_t, h_t)
69 # Ensure h_t has the correct shape
70 # Add these lines before the assignment
71 #print(f"Shape of h_t at time step {t}: {h_t.shape}")
72 #print(f"Shape of h_seq before assignment: {h_seq[:, t, :].shape}")
---> 74 h_seq[:, t, :] = h_t
76 # If valid_len is provided, apply masking
77 if valid_len is not None:
78 # Create a mask for the current time step
RuntimeError: expand(torch.cuda.FloatTensor{[64, 64, 512]}, size=[64, 512]): the number of sizes
provided (2) must be greater or equal to the number of dimensions in the tensor (3)

My VanillaRNN code :

class VanillaRNN(nn.Module):
def __init__(self, input_size, hidden_size):
"""
Constructor for the VanillaRNN class.

Args:
input_size (int): The size (number of features) of the input data at each time step.
hidden_size (int): The size (number of features) of the hidden state.

Description:
Initializes the VanillaRNN model, which processes sequential input data across multiple time
steps.
This class uses the previously defined VanillaRNNCell to process one time step at a time.
The RNN will maintain a hidden state of size `hidden_size` throughout the sequence.
"""
super(VanillaRNN, self).__init__()

#################################################################################
###
# TODO: Implement the VanillaRNN constructor

#################################################################################
###
# Your implementation code
super(VanillaRNN, self).__init__()
# Initialize the VanillaRNNCell with input and hidden sizes
self.rnn_cell = VanillaRNNCell(input_size, hidden_size)
self.hidden_size = hidden_size

#################################################################################
###
# END OF YOUR CODE

#################################################################################
###

def forward(self, x, h_0=None, valid_len=None, **kwargs):


"""
Forward pass for the entire sequence.
Args:
x (Tensor): Input sequence of shape (batch_size, seq_length, input_size).
h_0 (Tensor, optional): Initial hidden state of shape (batch_size, hidden_size).
valid_len (Tensor): Tensor of shape (batch_size,) containing the lengths of sequences before
padding.
Returns:
h_seq (Tensor): Hidden states at each time step of shape (batch_size, seq_length,
hidden_size).
h_t (Tensor): The last hidden state for last time step of shape (batch_size, hidden_size)
"""
batch_size, seq_length, _ = x.size()

#################################################################################
###
# TODO: Implement the VanillaRNN forward pass. If h_0 is None, then the initial
# hidden state should be initialized to 0 tensor. Also, implement the masking based
# on the given valid_len if it is not None.

#################################################################################
###
# Your implementation code

# If h_0 is None, initialize it as a zero tensor of shape (batch_size, hidden_size)


if h_0 is None:
h_0 = torch.zeros(batch_size, self.hidden_size).to(x.device)

# Initialize a tensor to hold all hidden states across the sequence


h_seq = torch.zeros(batch_size, seq_length, self.hidden_size).to(x.device)

# Set the initial hidden state


h_t = h_0

# Iterate over each time step in the sequence


for t in range(seq_length):
# Get the input at time step t
x_t = x[:, t, :]

# Perform the forward pass using VanillaRNNCell for time step t


h_t = self.rnn_cell(x_t, h_t)

# Ensure h_t has the correct shape


# Add these lines before the assignment
#print(f"Shape of h_t at time step {t}: {h_t.shape}")
#print(f"Shape of h_seq before assignment: {h_seq[:, t, :].shape}")
h_seq[:, t, :] = h_t

# If valid_len is provided, apply masking


if valid_len is not None:
# Create a mask for the current time step
mask = (valid_len > t).float().unsqueeze(1)
# Update h_t based on the mask
h_t = h_t * mask + h_0 * (1 - mask) # Reset to h_0 for invalid lengths

#################################################################################
###
# END OF YOUR CODE
#################################################################################
###
return h_seq, h_t
My RNNTraslator code :
class RNNTranslator(nn.Module):
def __init__(self, src_vocab_size, tgt_vocab_size, embedding_dim, hidden_size, enc_net, dec_net):
super(RNNTranslator, self).__init__()

#################################################################################
###
# TODO: Implement the RNNTranslator constructor

#################################################################################
###
# Your implementation code
# Encoder: Takes source vocab size and embedding dim, uses custom RNN cell
self.encoder = RNNEncoder(src_vocab_size, embedding_dim, hidden_size, enc_net)

# Decoder: Takes target vocab size and uses custom RNN cell
self.decoder = RNNDecoder(tgt_vocab_size, embedding_dim, hidden_size, dec_net)

#################################################################################
###
# END OF YOUR CODE

#################################################################################
###
def forward(self, source_seq, source_lengths, target_seq, target_lengths):
"""
Args:
source_seq (Tensor): Source sequences (batch_size, source_sequence_length).
source_lengths (Tensor): Lengths of source sequences before padding (batch_size,).
target_seq (Tensor): Target sequences (batch_size, target_sequence_length).
target_lengths (Tensor): Lengths of target sequences before padding (batch_size,).
Returns:
logits (Tensor): Logits of shape (batch_size, tgt_seq_len, tgt_vocab_size), representing
predicted token probabilities.
"""
batch_size = source_seq.size(0)

#################################################################################
###
# TODO: Implement the RNNTranslator forward pass by running the forward pass of the
# Encoder continued by the Decoder.

#################################################################################
###
# Your implementation code

# Step 1: Encode the source sequence using the encoder


batch_size = source_seq.size(0)
initial_hidden, initial_cell = self.encoder(source_seq, source_lengths) # Encoder returns hidden
and cell states

# Step 2: Decode the target sequence using the decoder


# During training, we use teacher forcing (target_seq is available)
logits, _ = self.decoder(initial_hidden, initial_cell, target_seq)

#################################################################################
###
# END OF YOUR CODE

#################################################################################
###

return logits
def predict(self, source_seq, source_lengths):
"""
Generates translations for the given source sequences.
Args:
source_seq (Tensor): Source sequences (batch_size, source_sequence_length).
source_lengths (Tensor): Lengths of source sequences before padding (batch_size,).
Returns:
predicted_tokens (Tensor): Generated token indices (batch_size, max_length).
"""
batch_size = source_seq.size(0)

#################################################################################
###
# TODO: Implement the RNNTranslator prediction function. Should be similar with
# the forward pass but not the same. Keep notes that you should return predicted
# tokens and not logits.

#################################################################################
###
# Your implementation code
# Step 1: Encode the source sequence using the encoder
batch_size = source_seq.size(0)
initial_hidden, initial_cell = self.encoder(source_seq, source_lengths)
# Step 2: Generate the target sequence using the decoder (greedy decoding for inference)
predicted_tokens, _ = self.decoder(initial_hidden, initial_cell, target_seq=None,
max_target_length=max_target_length)

#################################################################################
###
# END OF YOUR CODE

#################################################################################
###
return predicted_tokens
My encoder code :

class RNNEncoder(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, rnn_net):
super(RNNEncoder, self).__init__()
"""
Args:
vocab_size (int): Number of unique words in the source vocabulary.
embed_dim (int): Dimension of the word embeddings.
hidden_dim (int): Dimension of the hidden state in the RNN.
rnn_net: Any of your RNN variants that already implemented.
"""

#################################################################################
###
# TODO: Implement the RNNEncoder constructor. You can use `nn.Embedding` for the
# embedding layer.

#################################################################################
###
# Your implementation code
# Embedding layer: maps word indices to dense vectors
self.embedding = nn.Embedding(vocab_size, embed_dim)

# RNN layer: can be Vanilla RNN, LSTM, or GRU


self.rnn_net = rnn_net
self.hidden_dim = hidden_dim

#################################################################################
###
# END OF YOUR CODE

#################################################################################
###
def forward(self, input_seq, valid_len):
"""
Args:
input_seq (Tensor): Tensor of shape (batch_size, sequence_length) containing word indices.
valid_len (Tensor): Tensor of shape (batch_size,) containing the lengths of sequences before
padding.
Returns:
final_hidden (Tensor): Tensor of shape (batch_size, hidden_dim) representing the final hidden
states.
final_cell (Tensor or None): Either
1. Tensor of shape (batch_size, hidden_dim) representing the final cell states if the rnn_net is
LSTM.
2. None if the rnn_net is not LSTM
"""

#################################################################################
###
# TODO: Implement the RNNEncoder forward pass. You must embed the input sequence to
# before put it into the RNN layer. Remember, output of RNN layer can be different
# depending on the type of RNN. Your code must be able to handle all of the
# implemented RNN variants.

#################################################################################
###
# Your implementation code
# Get the batch size and sequence length
batch_size, seq_length = input_seq.size()

# Embed the input sequence (batch_size, seq_length, embed_dim)


embedded_seq = self.embedding(input_seq)

# Forward pass through the RNN


if isinstance(self.rnn_net, nn.LSTM):
# LSTM returns both hidden and cell states
rnn_output, (final_hidden, final_cell) = self.rnn_net(embedded_seq, valid_len=valid_len)
else:
# GRU/Vanilla RNN only returns the hidden state
rnn_output, final_hidden = self.rnn_net(embedded_seq, valid_len=valid_len)
final_cell = None # No cell state for non-LSTM RNNs

#################################################################################
###
# END OF YOUR CODE

#################################################################################
###

return final_hidden, final_cell

My decoder code :

class RNNDecoder(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, rnn_net):
super(RNNDecoder, self).__init__()
"""
Args:
vocab_size (int): Number of unique words in the target vocabulary.
embed_dim (int): Dimension of the word embeddings.
hidden_dim (int): Dimension of the hidden state in the RNN.
"""
self.embedding = nn.Embedding(vocab_size, embed_dim) # Embedding layer
self.rnn = rnn_net
self.output_W = nn.Parameter(torch.randn(hidden_dim, vocab_size)) # Custom linear layer
weights
self.output_b = nn.Parameter(torch.zeros(vocab_size)) # Custom linear layer bias
def custom_linear(self, hidden_state):
# Ensure the hidden state is 2D for matrix multiplication
return hidden_state @ self.output_W + self.output_b
def forward(self, initial_hidden, initial_cell=None, target_seq=None):
"""
Args:
initial_hidden (Tensor): Tensor of shape (batch_size, hidden_dim).
initial_cell (Tensor or None): Either a tensor of shape (batch_size, hidden_dim) for LSTM or
None for RNN.
target_seq (Tensor): Tensor of shape (batch_size, target_sequence_length) containing word
indices.
Returns:
outputs (Tensor): Tensor of shape (batch_size, target_sequence_length, vocab_size) with raw
scores.
hidden_state (Tensor): Tensor containing the final hidden state.
"""

batch_size = initial_hidden.size(0)
if target_seq is not None:
# Teacher Forcing mode
seq_length = target_seq.size(1)
outputs = []
hidden_state = initial_hidden

for t in range(seq_length):
input_t = self.embedding(target_seq[:, t]) # Shape: (batch_size, embed_dim)
hidden_state, _ = self.rnn(input_t.unsqueeze(1), hidden_state) # Forward through RNN
output_t = self.custom_linear(hidden_state) # Compute the output
outputs.append(output_t)
outputs = torch.stack(outputs, dim=1) # Shape: (batch_size, seq_length, vocab_size)
return outputs, hidden_state
else:
# Inference mode
outputs = []
input_t = torch.full((batch_size,), BOS_TOKEN, dtype=torch.long,
device=initial_hidden.device) # Start with BOS token
hidden_state = initial_hidden
for _ in range(max_target_length):
input_t_embedded = self.embedding(input_t) # Shape: (batch_size, embed_dim)
hidden_state, _ = self.rnn(input_t_embedded.unsqueeze(1), hidden_state) # Forward
through RNN
output_t = self.custom_linear(hidden_state) # Compute the output
outputs.append(output_t)
# Greedy decoding: use the predicted token for the next input
input_t = output_t.argmax(dim=1) # Get the predicted token (batch_size,)
outputs = torch.stack(outputs, dim=1) # Shape: (batch_size, max_target_length, vocab_size)
return outputs, hidden_state

My train_model code:

import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
def train_model(model, train_iter, val_iter, lr, epochs, device, run_name="experiment-1"):
"""
Trains a Translator model (RNN or Transformer-based).
Args:
model (nn.Module): The Translator model to train.
train_iter (DataLoader): Training data iterator.
val_iter (DataLoader): Validation data iterator.
lr (float): Learning rate.
epochs (int): Number of epochs to train.
device (device): Device used to train.
run_name (str): Run name to log on Tensorboard.
"""
model.to(device)
#####################################################################
# TODO: ###############
# Setup the optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

PAD_TOKEN = 0 # Assuming 0 is the PAD token, adjust if necessary


BOS_TOKEN = 1 # Assuming 1 is the BOS token, adjust if necessary
criterion = nn.CrossEntropyLoss(ignore_index=PAD_TOKEN) # Ignoring padding tokens

# Set up TensorBoard writer


writer = SummaryWriter(log_dir=f'runs/{run_name}')
for epoch in range(1, epochs + 1):
# Training phase
model.train()
total_train_loss = 0
total_train_batches = 0
# Iterate over training batches
for _, train_data in tqdm(enumerate(train_iter), desc=f"Epoch {epoch} Training"):

# Unpack the training data


source_seq, source_lengths, target_seq, target_lengths = train_data
# Move tensors to the appropriate device only if necessary
source_seq = source_seq.to(device) if not source_seq.is_cuda else source_seq
source_lengths = source_lengths.to(device) if not source_lengths.is_cuda else source_lengths
target_seq = target_seq.to(device) if not target_seq.is_cuda else target_seq
target_lengths = target_lengths.to(device) if not target_lengths.is_cuda else target_lengths
optimizer.zero_grad() # Reset gradients
# Forward pass: Pass source_seq, source_lengths, target_seq[:, :-1], and target_lengths
outputs, _ = model(source_seq, source_lengths, target_seq[:, :-1], target_lengths) # Pass all
4 arguments
# Calculate the loss, ignoring BOS token and PAD token
loss = criterion(outputs.view(-1, outputs.size(-1)), target_seq[:, 1:].view(-1)) # Exclude BOS
token
# Backpropagation
loss.backward()
optimizer.step()
total_train_loss += loss.item()
total_train_batches += 1
average_train_loss = total_train_loss / total_train_batches
# Validation phase
model.eval()
total_val_loss = 0
total_val_batches = 0
with torch.no_grad():
for _, val_data in tqdm(enumerate(val_iter), desc=f"Epoch {epoch} Validation"):
source_seq, source_lengths, target_seq, target_lengths = val_data
# Move tensors to the appropriate device only if necessary
source_seq = source_seq.to(device) if not source_seq.is_cuda else source_seq
source_lengths = source_lengths.to(device) if not source_lengths.is_cuda else
source_lengths
target_seq = target_seq.to(device) if not target_seq.is_cuda else target_seq
target_lengths = target_lengths.to(device) if not target_lengths.is_cuda else target_lengths
# Forward pass (excluding last token for the input)
outputs, _ = model(source_seq, source_lengths, target_seq[:, :-1], target_lengths)
# Calculate the validation loss, ignoring BOS and PAD tokens
loss = criterion(outputs.view(-1, outputs.size(-1)), target_seq[:, 1:].view(-1))
total_val_loss += loss.item()
total_val_batches += 1
average_val_loss = total_val_loss / total_val_batches
###End of TODO########################################
# Log to TensorBoard
writer.add_scalar('Loss/Train', average_train_loss, epoch)
writer.add_scalar('Loss/Validation', average_val_loss, epoch)
# Print epoch summary
print(f"""Epoch {epoch}/{epochs}
Train Loss: {average_train_loss:.4f}
Validation Loss: {average_val_loss:.4f}\n""")
# Close the TensorBoard writer
writer.close()
return model

correct the code that causes the problem , only in the todo section

You might also like