File size: 1,999 Bytes
af98023 c3a2cbd 0d6bc62 c3a2cbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# model_loader.py
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
# Classifier Model (XLM-RoBERTa for toxicity classification)
class ClassifierModel:
def __init__(self):
self.model = None
self.tokenizer = None
self.load_model()
def load_model(self):
"""
Load the fine-tuned XLM-RoBERTa model and tokenizer for toxic comment classification.
"""
try:
model_name = "JanviMl/xlm-roberta-toxic-classifier-capstone"
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
except Exception as e:
raise Exception(f"Error loading classifier model or tokenizer: {str(e)}")
# Paraphraser Model (Granite 3.2-2B-Instruct for paraphrasing)
class ParaphraserModel:
def __init__(self):
self.model = None
self.tokenizer = None
self.load_model()
def load_model(self):
"""
Load the Granite 3.2-2B-Instruct model and tokenizer for paraphrasing.
"""
try:
model_name = "ibm-granite/granite-3.2-2b-instruct"
self.model = AutoModelForCausalLM.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
except Exception as e:
raise Exception(f"Error loading paraphrase model or tokenizer: {str(e)}")
# Metrics Models (Sentence-BERT only)
class MetricsModels:
def __init__(self):
self.sentence_bert_model = None
def load_sentence_bert(self):
if self.sentence_bert_model is None:
self.sentence_bert_model = SentenceTransformer('all-MiniLM-L6-v2')
return self.sentence_bert_model
# Singleton instances
classifier_model = ClassifierModel()
paraphraser_model = ParaphraserModel()
metrics_models = MetricsModels() |