Lord-Raven commited on
Commit
6df156f
·
1 Parent(s): 16e4efd

Experimenting with few-shot classification.

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -59,15 +59,14 @@ few_shot_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', mod
59
  ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-small-en-v1.5', file_name="onnx/model.onnx")
60
  few_shot_model = SetFitModel.from_pretrained("moshew/bge-small-en-v1.5_setfit-sst2-english", multi_target_strategy="multi-output")
61
 
62
- def get_templated_dataset(candidate_labels, sample_size, template="This sentence is {}"):
63
- examples = []
64
- for label in candidate_labels:
65
- for _ in range(sample_size):
66
- examples.append({"text": template.format(label), "label": label})
67
- return examples
68
 
69
  candidate_labels = ["true", "false"]
70
- synthetic_dataset = get_templated_dataset(candidate_labels=candidate_labels, sample_size=8, template="This statement is {}")
 
 
 
 
71
 
72
  args = TrainingArguments(
73
  batch_size=32,
@@ -76,8 +75,8 @@ args = TrainingArguments(
76
 
77
  trainer = Trainer(
78
  model=few_shot_model,
79
- args=args,
80
- train_dataset=synthetic_dataset
81
  )
82
  trainer.train()
83
 
 
59
  ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-small-en-v1.5', file_name="onnx/model.onnx")
60
  few_shot_model = SetFitModel.from_pretrained("moshew/bge-small-en-v1.5_setfit-sst2-english", multi_target_strategy="multi-output")
61
 
62
+
 
 
 
 
 
63
 
64
  candidate_labels = ["true", "false"]
65
+ reference_dataset = load_dataset("emotion")
66
+ dummy_dataset = Dataset.from_dict({})
67
+ train_dataset = get_templated_dataset(dummy_dataset, candidate_labels=candidate_labels, sample_size=8, template="This statement is {}")
68
+
69
+
70
 
71
  args = TrainingArguments(
72
  batch_size=32,
 
75
 
76
  trainer = Trainer(
77
  model=few_shot_model,
78
+ train_dataset=train_dataset,
79
+ eval_dataset=reference_dataset["test"]
80
  )
81
  trainer.train()
82