Spaces:
Running
on
Zero
Running
on
Zero
Lord-Raven
commited on
Commit
·
c44bdaf
1
Parent(s):
39080c2
Experimenting with few-shot classification.
Browse files
app.py
CHANGED
@@ -60,20 +60,20 @@ class OnnxSetFitModel:
|
|
60 |
# "Xenova/distilbert-base-uncased-mnli" "typeform/distilbert-base-uncased-mnli" Bad answers
|
61 |
# "Xenova/deBERTa-v3-base-mnli" "MoritzLaurer/DeBERTa-v3-base-mnli" Still a bit slow and not great answers
|
62 |
# "xenova/nli-deberta-v3-small" "cross-encoder/nli-deberta-v3-small" Was using this for a good while and it was...okay
|
63 |
-
model_name = "
|
64 |
-
file_name = "onnx/
|
65 |
-
tokenizer_name = "MoritzLaurer/
|
66 |
model = ORTModelForSequenceClassification.from_pretrained(model_name, file_name=file_name)
|
67 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, model_max_length=512)
|
68 |
classifier = pipeline(task="zero-shot-classification", model=model, tokenizer=tokenizer)
|
69 |
|
70 |
-
few_shot_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5'
|
71 |
-
ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-
|
72 |
-
few_shot_model = SetFitModel.from_pretrained("moshew/bge-small-en-v1.5_setfit-sst2-english"
|
73 |
|
74 |
# Train few_shot_model
|
75 |
candidate_labels = ["correct", "wrong"]
|
76 |
-
reference_dataset = load_dataset("
|
77 |
dummy_dataset = Dataset.from_dict({})
|
78 |
train_dataset = get_templated_dataset(dummy_dataset, candidate_labels=candidate_labels, sample_size=8, template="This conclusion is {}.")
|
79 |
args = TrainingArguments(
|
|
|
60 |
# "Xenova/distilbert-base-uncased-mnli" "typeform/distilbert-base-uncased-mnli" Bad answers
|
61 |
# "Xenova/deBERTa-v3-base-mnli" "MoritzLaurer/DeBERTa-v3-base-mnli" Still a bit slow and not great answers
|
62 |
# "xenova/nli-deberta-v3-small" "cross-encoder/nli-deberta-v3-small" Was using this for a good while and it was...okay
|
63 |
+
model_name = "MoritzLaurer/bge-m3-zeroshot-v2.0"
|
64 |
+
file_name = "onnx/model.onnx"
|
65 |
+
tokenizer_name = "MoritzLaurer/bge-m3-zeroshot-v2.0"
|
66 |
model = ORTModelForSequenceClassification.from_pretrained(model_name, file_name=file_name)
|
67 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, model_max_length=512)
|
68 |
classifier = pipeline(task="zero-shot-classification", model=model, tokenizer=tokenizer)
|
69 |
|
70 |
+
few_shot_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-m3', model_max_length=512) # 'BAAI/bge-small-en-v1.5'
|
71 |
+
ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-m3', file_name="onnx/model.onnx") # 'BAAI/bge-small-en-v1.5'
|
72 |
+
few_shot_model = SetFitModel.from_pretrained("BAAI/bge-m3") # "moshew/bge-small-en-v1.5_setfit-sst2-english"
|
73 |
|
74 |
# Train few_shot_model
|
75 |
candidate_labels = ["correct", "wrong"]
|
76 |
+
reference_dataset = load_dataset("SetFit/sst2")
|
77 |
dummy_dataset = Dataset.from_dict({})
|
78 |
train_dataset = get_templated_dataset(dummy_dataset, candidate_labels=candidate_labels, sample_size=8, template="This conclusion is {}.")
|
79 |
args = TrainingArguments(
|