0% found this document useful (0 votes)

53 views21 pages

Project Source

Uploaded by

Gokul

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

53 views21 pages

Project Source

Uploaded by

Gokul

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 21

Config.

from pathlib import Path

# create an configuration

def get_config():

return {

"batch_size": 8,

"num_epochs": 20,

"lr": 10**-4,

"seq_len": 350,

"d_model": 512,

"lang_src": "en",

"lang_tgt": "it",

"model_folder": "weights",

"model_basename": "tmodel_",

"preload": None,

"tokenizer_file": "tokenizer_{0}.json",

"experiment_name": "runs/tmodel_",

# get weights

def get_weights_file_path(config, epoch: str):

model_folder = config["model_folder"]

model_basename = config["model_basename"]

model_filename = f"{model_basename}{epoch}.pt"

return str(Path('.') / model_folder / model_filename)

Corpus.py

import torch

import torch.nn as nn

from torch.utils.data import Dataset

from typing import Any

class BillingualDataset(Dataset):

def init(self, dataset, tokenizer_src, tokenizer_tgt, src_lang, tgt_lang, seq_len):

super().__init__()

self.dataset = dataset

self.tokenizer_src = tokenizer_src

self.tokenizer_tgt = tokenizer_tgt

self.src_lang = src_lang

self.tgt_lang = tgt_lang

self.seq_len = seq_len

self.sos_token = torch.tensor([

tokenizer_src.token_to_id('[SOS]')

], dtype=torch.int64)

self.eos_token = torch.tensor([

tokenizer_src.token_to_id('[EOS]')

], dtype=torch.int64)

self.pad_token = torch.tensor([

tokenizer_src.token_to_id('[PAD]')

], dtype=torch.int64)

def __len__(self):

return len(self.dataset)
def __getitem__(self, index) -> Any:

src_target_pair = self.dataset[index]

src_text = src_target_pair["translation"][self.src_lang]

tgt_text = src_target_pair["translation"][self.tgt_lang]

enc_input_tokens = self.tokenizer_src.encode(src_text).ids

dec_input_tokens = self.tokenizer_tgt.encode(tgt_text).ids

enc_num_padding_tokens = self.seq_len - len(enc_input_tokens) - 2

dec_num_padding_tokens = self.seq_len - len(dec_input_tokens) - 1

if enc_num_padding_tokens < 0 or dec_num_padding_tokens < 0:

raise ValueError("Someone is too long")

# add SOS and EOS to the source text

encoder_input = torch.cat(

self.sos_token,

torch.tensor(enc_input_tokens, dtype=torch.int64),

self.eos_token,

torch.tensor([self.pad_token] * enc_num_padding_tokens, dtype=torch.int64)

# add SOS to the decoder input

decoder_input = torch.cat(

self.sos_token,

torch.tensor(dec_input_tokens, dtype=torch.int64),

torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64)

]
)

# add EOS to the label

label = torch.cat(

torch.tensor(dec_input_tokens, dtype=torch.int64),

self.eos_token,

torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64)

assert encoder_input.size(0) == self.seq_len

assert decoder_input.size(0) == self.seq_len

assert label.size(0) == self.seq_len

return {

"encoder_input": encoder_input, # seq_len

"decoder_input": decoder_input, # seq_len

"encoder_mask": (encoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int(), # (1, 1,

seq_len)

"decoder_mask": (decoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int() &

casual_mask(decoder_input.size(0)), # (seq_len, 1) & (1, seq_len, seq_len)

"label": label, # (seq_len)

"src_text": src_text,

"tgt_text": tgt_text

def casual_mask(size):

mask = torch.triu(torch.ones(1, size, size), diagonal=1).type(torch.int)

return mask == 0
Model.py

# import libraries

import torch

import math

import torch.nn as nn

# d_model -> size of embbeded vector

# h -> number of heads

class InputEmbeddings(nn.Module):

def init(self, d_model: int, vocab_size: int):

super().__init__()

self.d_model = d_model

self.vocab_size = vocab_size

self.embedding = nn.Embedding(vocab_size, d_model)

def forward(self, x):

return self.embedding(x) * math.sqrt(self.d_model)

class PositionalEncoding(nn.Module):

def __init__(self, d_model: int, sen_len: int, dropout: float) -> None:

super().__init__()

self.d_model = d_model

self.sen_len = sen_len

self.dropout = nn.Dropout(dropout)

# create a matrix of shape (sen_len, d_model)

pe = torch.zeros(sen_len, d_model)

# create a vector of shape (sen_len)

position = torch.arange(0, sen_len, dtype=torch.float).unsqueeze(1) # shape: (sen_len, 1)

# formula

div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

# apply sin to even positions

pe[:, 0::2] = torch.sin(position * div_term)

# apply cos to odd positions

pe[:, 1::2] = torch.cos(position * div_term)

pe = pe.unsqueeze(0) # shape: (1, sen_len, d_model)

self.register_buffer("pe", pe)

def forward(self, x):

x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False)

return self.dropout(x)

class LayerNormalization(nn.Module):

def init(self, eps: float = 10**-6) -> None:

super().__init__()

self.eps = eps

# alpha -> Multiplicative

self.alpha = nn.Parameter(torch.ones(1))

# beta -> Addictive

self.beta = nn.Parameter(torch.zeros(1))
def forward(self, x):

mean = x.mean(dim = -1, keepdim=True)

std = x.std(dim = -1, keepdim=True)

return self.alpha * (x - mean) / (std + self.eps) + self.beta

class FeedForwardBlock(nn.Module):

def __init__(self, d_model: int, d_ff: int, dropout: float) -> None:

super().__init__()

self.linear_01 = nn.Linear(d_model, d_ff) # w1 and b1

self.dropout = nn.Dropout(dropout)

self.linear_02 = nn.Linear(d_ff, d_model) # w2 and b2

def forward(self, x):

# (Batch, sen_len, d_model) --> (Batch, sen_len, d_ff) --> (Batch, sen_len, d_model)

return self.linear_02(self.dropout(torch.relu(self.linear_01(x))))

class MultiHeadAttention(nn.Module):

def __init__(self, d_model: int, heads: int, dropout: float) -> None:

super().__init__()

self.d_model = d_model

self.heads = heads

assert d_model % heads == 0, "d_model is not divisible by heads"

self.d_k = d_model // heads

# set the query, key and value vector

self.w_q = nn.Linear(d_model, d_model) # w_q

self.w_k = nn.Linear(d_model, d_model) # w_k

self.w_v = nn.Linear(d_model, d_model) # w_v

# output

self.w_o = nn.Linear(d_model, d_model) # w_o

self.dropout = nn.Dropout(dropout)

@staticmethod

def Attention(query, key, value, mask, dropout: nn.Dropout):

d_k = query.shape[-1]

# (Batch, h, sen_len, d_k) --> (Batch, h, sen_len, sen_len)

attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)

if mask is not None:

attention_scores.masked_fill_(mask == 0, -1e9)

attention_scores = attention_scores.softmax(dim = -1) # (Batch, h, sen_len, sen_len)

if dropout is not None:

attention_scores = dropout(attention_scores)

return (attention_scores @ value), attention_scores

def forward(self, q, k, v, mask):

# (Batch, sen_len, d_model) --> (Batch, sen_len, d_model)

query = self.w_q(q)
key = self.w_k(k)

value = self.w_v(v)

# (Batch, sen_len, d_model) --> (Batch, sen_len, heads, d_k) --> (Batch, heads, sen_len, d_k)

query = query.view(query.shape[0], query.shape[1], self.heads, self.d_k).transpose(1, 2)

key = key.view(key.shape[0], key.shape[1], self.heads, self.d_k).transpose(1, 2)

value = value.view(value.shape[0], value.shape[1], self.heads, self.d_k).transpose(1, 2)

# call the attention mechanism

x, self.attention_scores = MultiHeadAttention.Attention(query, key, value, mask, self.dropout)

# (Batch, heads, sen_len, d_k) --> (Batch, sen_len, heads, d_k) --> (Batch, sen_len, d_model)

x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.heads * self.d_k)

# (Batch, sen_len, d_model) --> (Batch, sen_len, d_model)

return self.w_o(x)

class ResidualConnection(nn.Module):

def init(self, dropout: float):

super().__init__()

self.dropout = nn.Dropout(dropout)

self.norm = LayerNormalization()

def forward(self, x, sublayer):

return x + self.dropout(sublayer(self.norm(x)))

class EncoderBlock(nn.Module):
def __init__(self, self_attention_block: MultiHeadAttention, feed_forward_network:
FeedForwardBlock, dropout: float):

super().__init__()

self.self_attention_block = self_attention_block

self.feed_forward_block = feed_forward_network

self.residual_connection = nn.ModuleList([ResidualConnection(dropout) for _ in range(2)])

def forward(self, x, src_mask):

x = self.residual_connection[0](x, lambda x: self.self_attention_block(x, x, x, src_mask))

x = self.residual_connection[1](x, self.feed_forward_block)

return x

class Encoder(nn.Module):

def init(self, layers: nn.ModuleList):

super().__init__()

self.layers = layers

self.norm = LayerNormalization()

def forward(self, x, mask):

for layer in self.layers:

x = layer(x, mask)

return self.norm(x)

class DecoderBlock(nn.Module):

def init(self, self_attention_block: MultiHeadAttention, cross_attention_block:

MultiHeadAttention, feed_forward_block: FeedForwardBlock, dropout: float):

super().__init__()
self.self_attention_block = self_attention_block

self.cross_attention_block = cross_attention_block

self.feed_forward_block = feed_forward_block

self.residual_connections = nn.ModuleList([ResidualConnection(dropout) for _ in range(3)])

def forward(self, x, encoder_output, src_mask, tgt_mask):

x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, tgt_mask))

x = self.residual_connections[1](x, lambda x: self.cross_attention_block(x, encoder_output,

encoder_output, src_mask))

x = self.residual_connections[2](x, self.feed_forward_block)

return x

class Decoder(nn.Module):

def init(self, layers: nn.ModuleList):

super().__init__()

self.layers = layers

self.norm = LayerNormalization()

def forward(self, x,encoder_output, src_mask, tgt_mask):

for layer in self.layers:

x = layer(x, encoder_output, src_mask, tgt_mask)

return self.norm(x)

class ProjectionLayer(nn.Module):

def init(self, d_model: int, vocab_size: int):

super().__init__()
self.linear = nn.Linear(d_model, vocab_size)

def forward(self, x):

# (Batch, sen_len, d_model) --> (Batch, sen_len, vocab_size)

return torch.log_softmax(self.linear(x), dim=-1)

class Transformer(nn.Module):

def init(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed:

InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, proj: ProjectionLayer):

super().__init__()

self.encoder = encoder

self.decoder = decoder

self.src_embed = src_embed

self.tgt_embed = tgt_embed

self.src_pos = src_pos

self.tgt_pos = tgt_pos

self.projection = proj

def encode(self, src, src_mask):

src = self.src_embed(src)

src = self.src_pos(src)

return self.encoder(src, src_mask)

def decode(self, encoder_output, src_mask, tgt, tgt_mask):

tgt = self.tgt_embed(tgt)

tgt = self.tgt_pos(tgt)

return self.decoder(tgt, encoder_output, src_mask, tgt_mask)

def project(self, x):

return self.projection(x)

# build the transformer

# N -> number of encoder and decoder blocks

def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int, tgt_seq_len: int,

d_model: int = 512, N: int = 6, heads: int = 8, dropout: float = 0.1, d_ff: int = 2048) -> Transformer:

# create the embedding layers

src_embed = InputEmbeddings(d_model, src_vocab_size)

tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)

# create the positional encodings

src_pos = PositionalEncoding(d_model, src_seq_len, dropout)

tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout)

# create the encoder blocks

encoder_blocks = []

for _ in range(N):

encoder_self_attention_block = MultiHeadAttention(d_model, heads, dropout)

feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)

encoder_block = EncoderBlock(encoder_self_attention_block, feed_forward_block, dropout)

encoder_blocks.append(encoder_block)

# create the decoder blocks

decoder_blocks = []

for _ in range(N):

decoder_self_attention_block = MultiHeadAttention(d_model, heads, dropout)

decoder_cross_attention_block = MultiHeadAttention(d_model, heads, dropout)

feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)

decoder_block = DecoderBlock(decoder_self_attention_block, decoder_cross_attention_block,

feed_forward_block, dropout)

decoder_blocks.append(decoder_block)

# create encoder and decoder blocks

encoder = Encoder(nn.ModuleList(encoder_blocks))

decoder = Decoder(nn.ModuleList(decoder_blocks))

# create the projection layer

projection_layer = ProjectionLayer(d_model, tgt_vocab_size)

# create the transformer

transformer = Transformer(encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos,

projection_layer)

# Intialize the parameters

for p in transformer.parameters():

if p.dim() > 1:

nn.init.xavier_uniform_(p)

return transformer
Train.py

import torch

import warnings

import torch.nn as nn

from torch.utils.data import Dataset, DataLoader, random_split

from torch.utils.tensorboard import SummaryWriter

from tqdm import tqdm

from datasets import load_dataset

from tokenizers import Tokenizer

from tokenizers.models import WordLevel

from tokenizers.trainers import WordLevelTrainer

from tokenizers.pre_tokenizers import Whitespace

from pathlib import Path

from corpus import BillingualDataset, casual_mask

from model import build_transformer

from config import get_weights_file_path, get_config

def get_all_sentences(dataset, lang):

for item in dataset:

yield item['translation'][lang]

# build the tokenizer

def get_or_build_tokenizer(config, dataset, lang):

# tokenizer path

Tokenizer_path = Path(config["tokenizer_file"].format(lang))
if not Path.exists(Tokenizer_path):

tokenizer = Tokenizer(WordLevel(unk_token='[UNK]'))

tokenizer.pre_tokenizer = Whitespace()

trainer = WordLevelTrainer(special_tokens=["[UNK]","[PAD]","[SOS]","[EOS]"], min_frequency=2)

tokenizer.train_from_iterator(get_all_sentences(dataset, lang), trainer=trainer)

tokenizer.save(str(Tokenizer_path))

else:

tokenizer = Tokenizer.from_file(str(Tokenizer_path))

return tokenizer

# get the dataset

def get_dataset(config):

dataset_raw = load_dataset("opus_books", f"{config['lang_src']}-{config['lang_tgt']}", split="train")

# build the tokenizer

tokenizer_src = get_or_build_tokenizer(config, dataset_raw, config["lang_src"])

tokenizer_tgt = get_or_build_tokenizer(config, dataset_raw, config["lang_tgt"])

# keep the 90% data for training and 10% for testing

train_ds_size = int(0.9 * len(dataset_raw))

val_ds_size = len(dataset_raw) - train_ds_size

train_ds_raw, val_ds_raw = random_split(dataset_raw, [train_ds_size, val_ds_size])

train_ds = BillingualDataset(train_ds_raw, tokenizer_src, tokenizer_tgt, config["lang_src"],

config["lang_tgt"], config["seq_len"])

val_ds = BillingualDataset(val_ds_raw, tokenizer_src, tokenizer_tgt, config["lang_src"],

config["lang_tgt"], config["seq_len"])

max_len_src = 0
max_len_tgt = 0

for item in dataset_raw:

src_ids = tokenizer_src.encode(item["translation"][config["lang_src"]]).ids

tgt_ids = tokenizer_tgt.encode(item["translation"][config["lang_tgt"]]).ids

max_len_src = max(max_len_src, len(src_ids))

max_len_tgt = max(max_len_tgt, len(tgt_ids))

print(f"Max length of source sentence: {max_len_src}")

print(f"Max length of target sentence: {max_len_tgt}")

# create a data loader

train_dataloader = DataLoader(

train_ds,

batch_size=config["batch_size"],

shuffle=True

val_dataloader = DataLoader(

val_ds,

batch_size=1,

shuffle=True

return train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt

def get_model(config, vocab_src_len, vocab_tgt_len):

# build the model

model = build_transformer(vocab_src_len, vocab_tgt_len, config["seq_len"], config["seq_len"],
config["d_model"])

return model

# create an model training loop

def train_model(config):

# define the device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

# create an model folder

Path(config["model_folder"]).mkdir(parents=True, exist_ok=True)

# get the data loaders

train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt = get_dataset(config)

# get the tokenizer model

model = get_model(config, tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size())

# change the model into device

model = model.to(device)

# Tensorboard

writer = SummaryWriter(config["experiment_name"])

# set the optimizer

optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], eps=1e-9)

intial_epoch = 0
global_step = 0

if config["preload"]:

model_filename = get_weights_file_path(config, config["preload"])

print(f"Preloading model: {model_filename}")

state = torch.load(model_filename)

intial_epoch = state["epoch"] + 1

optimizer.load_state_dict(state["optimizer_state_dict"])

global_step = state["global_step"]

# set the loss function

loss_fn = nn.CrossEntropyLoss(ignore_index=tokenizer_src.token_to_id("[PAD]"),
label_smoothing=0.1).to(device)

for epoch in range(intial_epoch, config["num_epochs"]):

model.train()

batch_iterator = tqdm(train_dataloader, desc=f"Processing epoch {epoch:02d}")

for batch in batch_iterator:

encoder_input = batch["encoder_input"].to(device) # (Batch_size, seq_len)

decoder_input = batch["decoder_input"].to(device) # (Batch_size, seq_len)

encoder_mask = batch["encoder_mask"].to(device) # (Batch_size, 1, 1, seq_len)

decoder_mask = batch["decoder_mask"].to(device) # (Batch_size, 1, seq_len, seq_len)

# run the tensors through transformers

encoder_output = model.encode(encoder_input, encoder_mask) # (Batch_size, seq_len,

d_model)

decoder_output = model.decode(encoder_output, encoder_mask, decoder_input,

decoder_mask) # (Batch_size, seq_len, d_model)

projec_output = model.project(decoder_output) # (Batch_size, seq_len, tgt_vocab_size)

label = batch['label'].to(device) # (Batch, seq_len)

# (Batch_size, seq_len, tgt_vocab_size) --> (Batch_size, seq_len, tgt_vocab_size)

loss = loss_fn(projec_output.view(-1, tokenizer_tgt.get_vocab_size()), label.view(-1))

batch_iterator.set_postfix({f"loss": f"{loss.item():6.3f}"})

# log the loss

writer.add_scalar("train loss", loss.item(), global_step)

writer.flush()

# backpropagation the loss

loss.backward()

# update the weights

optimizer.step()

optimizer.zero_grad()

global_step += 1

# save the model

model_filename = get_weights_file_path(config, f"{epoch:02d}")

torch.save(

"epoch": epoch,

"model_state_dict": model.state_dict(),

"optimizer_state_dict": optimizer.state_dict(),

"global_step": global_step

model_filename

)
if __name__ == "__main__":

warnings.filterwarnings("ignore")

config = get_config()

train_model(config)

Project Structure

NLP 4
No ratings yet
NLP 4
10 pages
Transformers for NLP Developers
No ratings yet
Transformers for NLP Developers
9 pages
Assignment No 4
No ratings yet
Assignment No 4
8 pages
Transformers Torch
No ratings yet
Transformers Torch
38 pages
EncoderDecoderSeq2Seq DeepLSTM
100% (1)
EncoderDecoderSeq2Seq DeepLSTM
7 pages
Building GPT-2 from Scratch in PyTorch
No ratings yet
Building GPT-2 from Scratch in PyTorch
13 pages
A4
No ratings yet
A4
8 pages
Deep Learning Lab Manual With Code
No ratings yet
Deep Learning Lab Manual With Code
10 pages
NLP
No ratings yet
NLP
15 pages
PyTorch ChatGLM Model Implementation
No ratings yet
PyTorch ChatGLM Model Implementation
20 pages
Causal Self-Attention in PyTorch
No ratings yet
Causal Self-Attention in PyTorch
10 pages
Transformers Implementations 1731410319
No ratings yet
Transformers Implementations 1731410319
10 pages
Astro AI
No ratings yet
Astro AI
20 pages
Transformer
No ratings yet
Transformer
10 pages
PyTorch Cheat Sheet & Quick Reference
No ratings yet
PyTorch Cheat Sheet & Quick Reference
6 pages
AI Lab6
No ratings yet
AI Lab6
22 pages
Mlp-Fromscratch Sigmoid-Mse
No ratings yet
Mlp-Fromscratch Sigmoid-Mse
13 pages
Tutorials Sources Beginner Ptcheat
No ratings yet
Tutorials Sources Beginner Ptcheat
7 pages
Font Image Augmentation & Model Training
No ratings yet
Font Image Augmentation & Model Training
78 pages
NLP4 Prasen
No ratings yet
NLP4 Prasen
5 pages
Pytorch Demo 1749471354
No ratings yet
Pytorch Demo 1749471354
10 pages
Lab 5
No ratings yet
Lab 5
7 pages
Assignment 9
No ratings yet
Assignment 9
4 pages
Karpathy MinGPT Model
No ratings yet
Karpathy MinGPT Model
7 pages
Converta Esse C
No ratings yet
Converta Esse C
15 pages
Def Set Random Seed (Seed)
No ratings yet
Def Set Random Seed (Seed)
29 pages
PyTorch Autoencoder & VAE Tutorial
No ratings yet
PyTorch Autoencoder & VAE Tutorial
17 pages
DL 7 The
No ratings yet
DL 7 The
134 pages
Vit32 GPTMD
No ratings yet
Vit32 GPTMD
6 pages
Homework IntroToDL
No ratings yet
Homework IntroToDL
3 pages
Visual Transformers
No ratings yet
Visual Transformers
26 pages
Coding Attention Mechanisms
No ratings yet
Coding Attention Mechanisms
24 pages
BERT for Sequence Classification Guide
No ratings yet
BERT for Sequence Classification Guide
2 pages
Val
No ratings yet
Val
9 pages
LSTM From Scratch in Python
No ratings yet
LSTM From Scratch in Python
11 pages
Understanding Bahdanau Attention Mechanism
No ratings yet
Understanding Bahdanau Attention Mechanism
41 pages
Model
No ratings yet
Model
3 pages
TXT
No ratings yet
TXT
7 pages
Convolutional Autoencoder in Pytorch On MNIST Dataset - by Eugenia Anello - DataSeries - Medium
No ratings yet
Convolutional Autoencoder in Pytorch On MNIST Dataset - by Eugenia Anello - DataSeries - Medium
18 pages
Autoencoder From Scratch
No ratings yet
Autoencoder From Scratch
21 pages
Code
No ratings yet
Code
10 pages
Correct The Error
No ratings yet
Correct The Error
11 pages
PES1PG24CS018 Debjit DLTP Assignment-2 BERT Report
No ratings yet
PES1PG24CS018 Debjit DLTP Assignment-2 BERT Report
10 pages
Tut4 NN Pytorch Updated - Ipynb - Colab
No ratings yet
Tut4 NN Pytorch Updated - Ipynb - Colab
11 pages
HW4 Pedro Aguiar
No ratings yet
HW4 Pedro Aguiar
6 pages
IndicTrans2 PDF to Punjabi Docx Conversion
No ratings yet
IndicTrans2 PDF to Punjabi Docx Conversion
5 pages
Exp 8 Machine Translation
No ratings yet
Exp 8 Machine Translation
11 pages
Transformer Flux
No ratings yet
Transformer Flux
11 pages
Tutorial Pytorch Best Commands
No ratings yet
Tutorial Pytorch Best Commands
8 pages
Understanding Transformers: Position Encoding
No ratings yet
Understanding Transformers: Position Encoding
33 pages
Credit Card Clustering Autoencoder
No ratings yet
Credit Card Clustering Autoencoder
6 pages
Set A
No ratings yet
Set A
20 pages
Code2pdf 67c73149b96ef
No ratings yet
Code2pdf 67c73149b96ef
4 pages
Understanding Transformer Parameter Count
No ratings yet
Understanding Transformer Parameter Count
6 pages
Wa0029.
No ratings yet
Wa0029.
11 pages
CVDL Tae 63
No ratings yet
CVDL Tae 63
9 pages
PyTorch Overview and Applications Guide
100% (4)
PyTorch Overview and Applications Guide
33 pages
Deep Learning with PyTorch
No ratings yet
Deep Learning with PyTorch
19 pages
411-9001-132 18.06 BSC3000
100% (2)
411-9001-132 18.06 BSC3000
216 pages
Market Snapshot Full Years 08-09
No ratings yet
Market Snapshot Full Years 08-09
1 page
Backup: (Note 1)
No ratings yet
Backup: (Note 1)
20 pages
Chapter 14
No ratings yet
Chapter 14
2 pages
Esd Seminar
No ratings yet
Esd Seminar
11 pages
HW2 24
No ratings yet
HW2 24
8 pages
IP Cam Manualul Utilizatorului
No ratings yet
IP Cam Manualul Utilizatorului
31 pages
IMBITR
No ratings yet
IMBITR
2 pages
AR in RETAIL Sector Report September 2019 v1.21 by STIQ
No ratings yet
AR in RETAIL Sector Report September 2019 v1.21 by STIQ
35 pages
AT6356E microIFEM Piezo 4P4x PDF
No ratings yet
AT6356E microIFEM Piezo 4P4x PDF
64 pages
Circuit Analysis I Homework 10
No ratings yet
Circuit Analysis I Homework 10
1 page
GA - DPP - 03 From Base, Powers and Averages
No ratings yet
GA - DPP - 03 From Base, Powers and Averages
30 pages
Javascript Developer I - 1
No ratings yet
Javascript Developer I - 1
11 pages
'Unimap Diiktiraf Ms Iso 9001:2008': Acknowledgement For Industrial Training Placement
No ratings yet
'Unimap Diiktiraf Ms Iso 9001:2008': Acknowledgement For Industrial Training Placement
1 page
V200 Total Station Quick Reference Guide
No ratings yet
V200 Total Station Quick Reference Guide
46 pages
Nevil Pooniwala's Stanford Profile
0% (1)
Nevil Pooniwala's Stanford Profile
2 pages
Lab: Windows Host Attack Investigation
No ratings yet
Lab: Windows Host Attack Investigation
8 pages
Parts of Microsoft Word and Shortcut Keys
No ratings yet
Parts of Microsoft Word and Shortcut Keys
18 pages
Excel VBA Notes
No ratings yet
Excel VBA Notes
5 pages
Unit V - Activity and Multimedia With Databases: Intent
No ratings yet
Unit V - Activity and Multimedia With Databases: Intent
53 pages
292 Introduction To Cyber Security II Assignment
No ratings yet
292 Introduction To Cyber Security II Assignment
2 pages
Manual TCG220
No ratings yet
Manual TCG220
61 pages
Assignment Brief BTEC Level 4-5 HNC/HND Diploma (QCF) : To Be Filled by The Learner
100% (1)
Assignment Brief BTEC Level 4-5 HNC/HND Diploma (QCF) : To Be Filled by The Learner
8 pages
Prompt Engineering White Paper
100% (1)
Prompt Engineering White Paper
13 pages
Mobile Computing Fundamentals Explained
No ratings yet
Mobile Computing Fundamentals Explained
17 pages
RPG Character Sheet Template
No ratings yet
RPG Character Sheet Template
8 pages
Communication Process Explained
No ratings yet
Communication Process Explained
2 pages
S28A - KAVACH - 25.9.23 - 1695635841139.pdf#toolbar 0
100% (1)
S28A - KAVACH - 25.9.23 - 1695635841139.pdf#toolbar 0
45 pages
Computer Class 4
No ratings yet
Computer Class 4
4 pages
SAP MRP Configuration
100% (1)
SAP MRP Configuration
32 pages

Project Source

Uploaded by

Project Source

Uploaded by

Config.

from pathlib import Path

def get_weights_file_path(config, epoch: str):

return str(Path('.') / model_folder / model_filename)

from torch.utils.data import Dataset

from typing import Any

def __init__(self, dataset, tokenizer_src, tokenizer_tgt, src_lang, tgt_lang, seq_len):

enc_num_padding_tokens = self.seq_len - len(enc_input_tokens) - 2

dec_num_padding_tokens = self.seq_len - len(dec_input_tokens) - 1

if enc_num_padding_tokens < 0 or dec_num_padding_tokens < 0:

raise ValueError("Someone is too long")

# add SOS and EOS to the source text

torch.tensor([self.pad_token] * enc_num_padding_tokens, dtype=torch.int64)

# add SOS to the decoder input

torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64)

# add EOS to the label

torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64)

assert encoder_input.size(0) == self.seq_len

assert decoder_input.size(0) == self.seq_len

assert label.size(0) == self.seq_len

"encoder_input": encoder_input, # seq_len

"decoder_input": decoder_input, # seq_len

"encoder_mask": (encoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int(), # (1, 1,

"decoder_mask": (decoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int() &

"label": label, # (seq_len)

mask = torch.triu(torch.ones(1, size, size), diagonal=1).type(torch.int)

# d_model -> size of embbeded vector

# h -> number of heads

def __init__(self, d_model: int, vocab_size: int):

self.embedding = nn.Embedding(vocab_size, d_model)

def forward(self, x):

return self.embedding(x) * math.sqrt(self.d_model)

# create a matrix of shape (sen_len, d_model)

# create a vector of shape (sen_len)

position = torch.arange(0, sen_len, dtype=torch.float).unsqueeze(1) # shape: (sen_len, 1)

div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

# apply sin to even positions

pe[:, 0::2] = torch.sin(position * div_term)

# apply cos to odd positions

pe[:, 1::2] = torch.cos(position * div_term)

pe = pe.unsqueeze(0) # shape: (1, sen_len, d_model)

def forward(self, x):

x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False)

def __init__(self, eps: float = 10**-6) -> None:

# alpha -> Multiplicative

# beta -> Addictive

mean = x.mean(dim = -1, keepdim=True)

std = x.std(dim = -1, keepdim=True)

return self.alpha * (x - mean) / (std + self.eps) + self.beta

self.linear_01 = nn.Linear(d_model, d_ff) # w1 and b1

self.linear_02 = nn.Linear(d_ff, d_model) # w2 and b2

def forward(self, x):

assert d_model % heads == 0, "d_model is not divisible by heads"

self.d_k = d_model // heads

self.w_q = nn.Linear(d_model, d_model) # w_q

self.w_k = nn.Linear(d_model, d_model) # w_k

self.w_v = nn.Linear(d_model, d_model) # w_v

self.w_o = nn.Linear(d_model, d_model) # w_o

def Attention(query, key, value, mask, dropout: nn.Dropout):

# (Batch, h, sen_len, d_k) --> (Batch, h, sen_len, sen_len)

attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)

if mask is not None:

attention_scores = attention_scores.softmax(dim = -1) # (Batch, h, sen_len, sen_len)

if dropout is not None:

return (attention_scores @ value), attention_scores

def forward(self, q, k, v, mask):

# (Batch, sen_len, d_model) --> (Batch, sen_len, d_model)

query = query.view(query.shape[0], query.shape[1], self.heads, self.d_k).transpose(1, 2)

key = key.view(key.shape[0], key.shape[1], self.heads, self.d_k).transpose(1, 2)

value = value.view(value.shape[0], value.shape[1], self.heads, self.d_k).transpose(1, 2)

# call the attention mechanism

x, self.attention_scores = MultiHeadAttention.Attention(query, key, value, mask, self.dropout)

x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.heads * self.d_k)

# (Batch, sen_len, d_model) --> (Batch, sen_len, d_model)

def __init__(self, dropout: float):

def forward(self, x, sublayer):

def init(self, dataset, tokenizer_src, tokenizer_tgt, src_lang, tgt_lang, seq_len):

def init(self, d_model: int, vocab_size: int):

def init(self, eps: float = 10**-6) -> None:

def init(self, dropout: float):

def init(self, layers: nn.ModuleList):

def init(self, self_attention_block: MultiHeadAttention, cross_attention_block:

def init(self, layers: nn.ModuleList):

def init(self, d_model: int, vocab_size: int):

def init(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed: