# utils/model_utils.
py
import torch
import json
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from utils.normalizer import normalize_bengali_english # ✅ use shared normalizer
# Load model & tokenizer
model_dir = "bert-emotion-model"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
model.eval()
# Load label classes
with open(os.path.join(model_dir, "labels.json")) as f:
labels = json.load(f)
id2label = {i: label for i, label in enumerate(labels)}
# Emotion prediction using BERT
def predict_emotion(text):
text = normalize_bengali_english(text) # ✅ normalize slang input
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
return id2label[predicted_class_id]