File size: 3,181 Bytes
af98023 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd 986acc0 c3a2cbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# model_loader.py
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import torch
import os
class ClassifierModel:
def __init__(self):
self.model = None
self.tokenizer = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.load_classifier_model()
def load_classifier_model(self):
"""
Load the fine-tuned XLM-RoBERTa model and tokenizer for toxicity classification.
"""
try:
model_name = "JanviMl/xlm-roberta-toxic-classifier-capstone"
print(f"Loading classifier model: {model_name}")
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model.to(self.device)
self.model.eval()
print("Classifier model loaded successfully")
except Exception as e:
print(f"Error loading classifier model: {str(e)}")
raise
classifier_model = ClassifierModel()
class ParaphraserModel:
def __init__(self):
self.model = None
self.tokenizer = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.load_paraphraser_model()
def load_paraphraser_model(self):
"""
Load the fine-tuned Granite 3.2-2B-Instruct model and tokenizer for paraphrasing.
"""
try:
model_name = "ibm-granite/granite-3.2-2b-instruct"
print(f"Loading paraphraser model: {model_name}")
self.model = AutoModelForCausalLM.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
# Set a distinct pad token to avoid conflict with eos token
if self.tokenizer.pad_token is None or self.tokenizer.pad_token == self.tokenizer.eos_token:
self.tokenizer.pad_token = "<pad>"
self.model.config.pad_token_id = self.tokenizer.convert_tokens_to_ids("<pad>")
self.model.to(self.device)
self.model.eval()
print("Paraphraser model loaded successfully")
except Exception as e:
print(f"Error loading paraphraser model: {str(e)}")
raise
paraphraser_model = ParaphraserModel()
class MetricsModels:
def __init__(self):
self.sentence_bert = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.load_sentence_bert()
def load_sentence_bert(self):
"""
Load the Sentence-BERT model for computing semantic similarity.
"""
try:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
print(f"Loading Sentence-BERT model: {model_name}")
self.sentence_bert = SentenceTransformer(model_name, device=self.device)
print("Sentence-BERT model loaded successfully")
except Exception as e:
print(f"Error loading Sentence-BERT model: {str(e)}")
raise
metrics_models = MetricsModels() |