Fix logs and model training and deployment
Browse files- app/tasks/inference.py +2 -2
- app/tasks/training.py +11 -8
app/tasks/inference.py
CHANGED
@@ -8,7 +8,7 @@ import time
|
|
8 |
from scipy.special import softmax
|
9 |
|
10 |
# HuggingFace Model to be used for inferencing
|
11 |
-
MODEL = "gpicciuca/
|
12 |
# MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
13 |
|
14 |
class InferenceTask:
|
@@ -37,7 +37,7 @@ class InferenceTask:
|
|
37 |
self.__model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
38 |
self.__is_loaded = True
|
39 |
except Exception as ex:
|
40 |
-
logger.error("Failed to load inference model: {ex}")
|
41 |
self.clear()
|
42 |
return False
|
43 |
|
|
|
8 |
from scipy.special import softmax
|
9 |
|
10 |
# HuggingFace Model to be used for inferencing
|
11 |
+
MODEL = "gpicciuca/sentiment_trainer"
|
12 |
# MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
13 |
|
14 |
class InferenceTask:
|
|
|
37 |
self.__model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
38 |
self.__is_loaded = True
|
39 |
except Exception as ex:
|
40 |
+
logger.error(f"Failed to load inference model: {ex}")
|
41 |
self.clear()
|
42 |
return False
|
43 |
|
app/tasks/training.py
CHANGED
@@ -20,9 +20,9 @@ import time
|
|
20 |
|
21 |
|
22 |
# MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
23 |
-
MODEL = "gpicciuca/twitter-roberta-base-sentiment-latest"
|
24 |
DATASET = "zeroshot/twitter-financial-news-sentiment"
|
25 |
-
|
|
|
26 |
|
27 |
RNG_SEED = 22
|
28 |
|
@@ -162,8 +162,8 @@ class TrainingTask:
|
|
162 |
self.__test_dataset = dataset_train_test["test"]
|
163 |
|
164 |
# Swap labels so that they match what the model actually expects
|
165 |
-
# The model expects {0:
|
166 |
-
# But the dataset uses {0:
|
167 |
# So here we just flip 1<->2 to remain consistent
|
168 |
def label_filter(row):
|
169 |
row["label"] = { 0: 0, 1: 2, 2: 1 }[row["label"]]
|
@@ -204,8 +204,8 @@ class TrainingTask:
|
|
204 |
Loads the model from the repository
|
205 |
"""
|
206 |
# Set the mapping between int label and its meaning.
|
207 |
-
id2label = {0: "
|
208 |
-
label2id = {"
|
209 |
|
210 |
# Acquire the model from the Hugging Face Hub, providing label and id mappings so that both we and the model can 'speak' the same language.
|
211 |
self.__model = AutoModelForSequenceClassification.from_pretrained(
|
@@ -251,7 +251,7 @@ class TrainingTask:
|
|
251 |
return metric.compute(predictions=predictions, references=labels)
|
252 |
|
253 |
# Checkpoints will be output to this `training_output_dir`.
|
254 |
-
training_output_dir = "
|
255 |
training_args = TrainingArguments(
|
256 |
output_dir=training_output_dir,
|
257 |
eval_strategy="epoch",
|
@@ -310,10 +310,13 @@ class TrainingTask:
|
|
310 |
"""
|
311 |
Uploads the fine-tuned model to HuggingFace
|
312 |
"""
|
313 |
-
|
|
|
|
|
314 |
|
315 |
def __reload_inference_model(self):
|
316 |
"""
|
317 |
Reloads the model used by the Inference class.
|
318 |
"""
|
|
|
319 |
infer_task.load_model()
|
|
|
20 |
|
21 |
|
22 |
# MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
|
|
23 |
DATASET = "zeroshot/twitter-financial-news-sentiment"
|
24 |
+
MODEL = "gpicciuca/sentiment_trainer"
|
25 |
+
HF_REPO = "gpicciuca/sentiment_trainer"
|
26 |
|
27 |
RNG_SEED = 22
|
28 |
|
|
|
162 |
self.__test_dataset = dataset_train_test["test"]
|
163 |
|
164 |
# Swap labels so that they match what the model actually expects
|
165 |
+
# The model expects {0: negative, 1: neutral, 2: positive}
|
166 |
+
# But the dataset uses {0: negative, 1: positive, 2: neutral}
|
167 |
# So here we just flip 1<->2 to remain consistent
|
168 |
def label_filter(row):
|
169 |
row["label"] = { 0: 0, 1: 2, 2: 1 }[row["label"]]
|
|
|
204 |
Loads the model from the repository
|
205 |
"""
|
206 |
# Set the mapping between int label and its meaning.
|
207 |
+
id2label = {0: "negative", 1: "neutral", 2: "positive"}
|
208 |
+
label2id = {"negative": 0, "neutral": 1, "positive": 2}
|
209 |
|
210 |
# Acquire the model from the Hugging Face Hub, providing label and id mappings so that both we and the model can 'speak' the same language.
|
211 |
self.__model = AutoModelForSequenceClassification.from_pretrained(
|
|
|
251 |
return metric.compute(predictions=predictions, references=labels)
|
252 |
|
253 |
# Checkpoints will be output to this `training_output_dir`.
|
254 |
+
training_output_dir = "/tmp/sentiment_trainer"
|
255 |
training_args = TrainingArguments(
|
256 |
output_dir=training_output_dir,
|
257 |
eval_strategy="epoch",
|
|
|
310 |
"""
|
311 |
Uploads the fine-tuned model to HuggingFace
|
312 |
"""
|
313 |
+
logger.info("Deploying Model and Tokenizer to HuggingFace")
|
314 |
+
self.__trainer.push_to_hub(HF_REPO)
|
315 |
+
self.__tokenizer.push_to_hub(HF_REPO)
|
316 |
|
317 |
def __reload_inference_model(self):
|
318 |
"""
|
319 |
Reloads the model used by the Inference class.
|
320 |
"""
|
321 |
+
logger.info("Reloading inference model")
|
322 |
infer_task.load_model()
|