スポンサーリンク
以前に物体検出タスクにて自己教師あり学習をやってみました
technoxs-stacker.hatenablog.com
今回は自己教師あり学習の結果を使って再学習する流れをやってみました
この記事は備忘録です
目次
スポンサーリンク
この記事でわかること
物体検出タスクのDNNにて自己教師あり学習の結果を使って再学習する際のサンプルコード
1.実行環境
Google Colab
ultralytics
lightly
1.1 モジュールインストール
!pip install -q lightly
!pip install -q ultralytics
import copy
import torch
import torchvision
from torch import nn
from lightly.loss import DINOLoss
from lightly.models.modules import DINOProjectionHead
from lightly.models.utils import deactivate_requires_grad, update_momentum
from lightly.transforms.dino_transform import DINOTransform
from lightly.utils.scheduler import cosine_schedule
from tqdm import tqdm
from ultralytics.nn.modules import Conv
import os
class DINO(torch.nn.Module):
def __init__(self, backbone, input_dim):
super().__init__()
self.student_backbone = backbone
self.student_head = DINOProjectionHead(
input_dim, 512, 64, 2048, freeze_last_layer=1
)
self.teacher_backbone = copy.deepcopy(backbone)
self.teacher_head = DINOProjectionHead(input_dim, 512, 64, 2048)
deactivate_requires_grad(self.teacher_backbone)
deactivate_requires_grad(self.teacher_head)
def forward(self, x):
y = self.student_backbone(x).flatten(start_dim=1)
z = self.student_head(y)
return z
def forward_teacher(self, x):
y = self.teacher_backbone(x).flatten(start_dim=1)
z = self.teacher_head(y)
return z
class PoolHead(nn.Module):
def __init__(self, f: int, i: int, c1: int): # Added type hints for scripting
super().__init__()
self.f = f # receive the outputs from these layers
self.i = i # layer number
self.conv = Conv(c1, 1280, 1, 1, None, 1)
self.avgpool = nn.AdaptiveAvgPool2d(1)
def forward(self, x: torch.Tensor) -> torch.Tensor: # Added type hints for scripting
return self.avgpool(self.conv(x))
def target_transform(t):
return 0
import configparser
def main(model_name="yolov5n.pt",epochs=10, batch_size=256):
from ultralytics import YOLO # Moved import inside the function
model_name="yolov5n.pt"
GLOBAL_CROP_SIZE = 224
LOCAL_CROP_SIZE = 96
backbone_layer_num=12
# load base model
yolo = YOLO(model_name)
print("base model-----------------------------------------------------------------------------")
print(yolo)
# Only backbone
yolo.model.model = yolo.model.model[:backbone_layer_num] # Keep first n layer
print("backbone model-----------------------------------------------------------------------------")
print(yolo)
dummy = torch.rand(2, 3, GLOBAL_CROP_SIZE, GLOBAL_CROP_SIZE)
out = yolo.model.model[:-1](dummy) # Run forward pass only using the first n layers-> Upsampleの手前まで実行する-> Upsampleを置き換えるため手前の出力を取得する
yolo.model.model[-1] = PoolHead(yolo.model.model[-1].f, yolo.model.model[-1].i, out.shape[1]) # Replace nth layer with PoolHead
out = yolo.model(dummy)
input_dim = out.flatten(start_dim=1).shape[1]
input_dim
backbone = yolo.model.requires_grad_()
backbone.train()
model = DINO(backbone, input_dim)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
normalize = dict(mean=(0.0,0.0,0.0), std=(1.0,1.0,1.0)) # YOLO uses these values
transform = DINOTransform(global_crop_size=GLOBAL_CROP_SIZE, local_crop_size=LOCAL_CROP_SIZE, normalize=normalize)
dataset = torchvision.datasets.VOCDetection(
"datasets/pascal_voc",
download=True,
# download=False,
transform=transform,
target_transform=target_transform,
)
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=batch_size,
shuffle=True,
drop_last=True,
#num_workers=2,
num_workers=os.cpu_count(),
pin_memory=True,
)
criterion = DINOLoss(
output_dim=2048,
warmup_teacher_temp_epochs=5,
)
# move loss to correct device because it also contains parameters
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print("Starting Training")
for epoch in range(epochs):
total_loss = 0
momentum_val = cosine_schedule(epoch, epochs, 0.996, 1)
# Initialize the progress bar
pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}", unit="batch")
for batch in pbar:
views = batch[0]
update_momentum(model.student_backbone, model.teacher_backbone, m=momentum_val)
update_momentum(model.student_head, model.teacher_head, m=momentum_val)
views = [view.to(device) for view in views]
global_views = views[:2]
teacher_out = [model.forward_teacher(view) for view in global_views]
student_out = [model.forward(view) for view in views]
loss = criterion(teacher_out, student_out, epoch=epoch)
total_loss += loss.detach()
loss.backward()
# We only cancel gradients of student head.
model.student_head.cancel_last_layer_gradients(current_epoch=epoch)
optimizer.step()
optimizer.zero_grad()
# Update the progress bar with the current batch loss
pbar.set_postfix(loss=loss.item())
avg_loss = total_loss / len(dataloader)
print(f"Epoch: {epoch + 1}, Loss: {avg_loss:.5f}")
#Load and save pretrained backbone
from ultralytics import YOLO
# Load the same model that was used for pretraining
yolo = YOLO(model_name)
# Transfer weights from pretrained model
yolo.model.load(model.student_backbone)
# Save the model for later use
yolo.save("pretrained.pt")
return 0
main()
4. 自己教師あり学習の結果を使って再学習
# Clone the YOLOv5 repository
!git clone https://github.com/ultralytics/yolov5
# Navigate to the cloned directory
%cd yolov5
# Install required packages
!pip install -r requirements.txt
!python train.py --data VOC.yaml --cache --epochs 3 --weights "/content/pretrained.pt" --cfg "/content/yolov5/models/yolov5n.yaml"
所感
まだまだ改良の余地ありますが、とりあえず自己教師あり学習から再学習までの流れを構築できました
スポンサーリンク
参考
colab.research.google.com