Spaces:

Ravenok
/

statosphere-backend

Running on Zero

App Files Files Community

Lord-Raven commited on Aug 27, 2024

Commit

fbcdba4

1 Parent(s): bd9a53f

Experimenting with few-shot classification.

Browse files

Files changed (1) hide show

app.py +32 -2

app.py CHANGED Viewed

@@ -7,8 +7,10 @@ from optimum.onnxruntime import ORTModelForSequenceClassification
 from optimum.onnxruntime import ORTModelForFeatureExtraction
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from setfit import SetFitModel
 from setfit.exporters.utils import mean_pooling
 # CORS Config
 app = FastAPI()
@@ -55,9 +57,36 @@ classifier = pipeline(task="zero-shot-classification", model=model, tokenizer=to
 few_shot_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', model_max_length=512)
 ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-small-en-v1.5', file_name="onnx/model.onnx")
-few_shot_model = SetFitModel.from_pretrained("moshew/bge-small-en-v1.5_setfit-sst2-english")
 onnx_few_shot_model = OnnxSetFitModel(ort_model, few_shot_tokenizer, few_shot_model.model_head)
 def classify(data_string, request: gradio.Request):
     if request:
         if request.headers["origin"] not in ["https://statosphere-3704059fdd7e.c5v4v4jx6pq5.win", "https://crunchatize-77a78ffcc6a6.c5v4v4jx6pq5.win", "https://ravenok-statosphere-backend.hf.space"]:
@@ -75,6 +104,7 @@ def zero_shot_classification(data):
 def few_shot_classification(data):
     results = onnx_few_shot_model(data['sequence'])
     response_string = json.dumps(results.tolist())
     return response_string

 from optimum.onnxruntime import ORTModelForFeatureExtraction
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from setfit import SetFitModel, Trainer, TrainingArguments
 from setfit.exporters.utils import mean_pooling
+from setfit import get_templated_dataset
+from datasets import load_dataset
 # CORS Config
 app = FastAPI()
 few_shot_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', model_max_length=512)
 ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-small-en-v1.5', file_name="onnx/model.onnx")
+few_shot_model = SetFitModel.from_pretrained("moshew/bge-small-en-v1.5_setfit-sst2-english", multi_target_strategy="multi-output")
+test_dataset = load_dataset("dair-ai/emotion", "split", split="test")
+print(test_dataset)
+classes = test_dataset.features["label"].names
+print(classes)
+train_dataset = get_templated_dataset()
+print(train_dataset)
+print(train_dataset[0])
+args = TrainingArguments(
+    batch_size=32,
+    num_epochs=1
+)
+trainer = Trainer(
+    model=few_shot_model,
+    args=args,
+    train_dataset=train_dataset,
+    eval_dataset=test_dataset
+)
+trainer.train()
+metrics = trainer.evaluate()
+print(metrics)
 onnx_few_shot_model = OnnxSetFitModel(ort_model, few_shot_tokenizer, few_shot_model.model_head)
 def classify(data_string, request: gradio.Request):
     if request:
         if request.headers["origin"] not in ["https://statosphere-3704059fdd7e.c5v4v4jx6pq5.win", "https://crunchatize-77a78ffcc6a6.c5v4v4jx6pq5.win", "https://ravenok-statosphere-backend.hf.space"]:
 def few_shot_classification(data):
     results = onnx_few_shot_model(data['sequence'])
+    print([classes[idx] for idx in results)
     response_string = json.dumps(results.tolist())
     return response_string