Lord-Raven commited on
Commit
a6ec990
·
1 Parent(s): 057b382

Experimenting with few-shot classification.

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -59,24 +59,25 @@ few_shot_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', mod
59
  ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-small-en-v1.5', file_name="onnx/model.onnx")
60
  few_shot_model = SetFitModel.from_pretrained("moshew/bge-small-en-v1.5_setfit-sst2-english", multi_target_strategy="multi-output")
61
 
62
- test_dataset = load_dataset("dair-ai/emotion", "split", split="test")
63
- print(test_dataset)
64
- classes = test_dataset.features["label"].names
65
- print(classes)
66
- train_dataset = get_templated_dataset(candidate_labels=classes)
67
- print(train_dataset)
68
- print(train_dataset[0])
 
 
69
 
70
  args = TrainingArguments(
71
  batch_size=32,
72
  num_epochs=1
73
  )
74
 
75
- trainer = Trainer(
76
  model=few_shot_model,
77
  args=args,
78
- train_dataset=train_dataset,
79
- eval_dataset=test_dataset
80
  )
81
  trainer.train()
82
 
 
59
  ort_model = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-small-en-v1.5', file_name="onnx/model.onnx")
60
  few_shot_model = SetFitModel.from_pretrained("moshew/bge-small-en-v1.5_setfit-sst2-english", multi_target_strategy="multi-output")
61
 
62
+ def get_templated_dataset(candidate_labels, sample_size, template="This sentence is {}"):
63
+ examples = []
64
+ for label in candidate_labels:
65
+ for _ in range(sample_size):
66
+ examples.append({"text": template.format(label), "label": label})
67
+ return examples
68
+
69
+ candidate_labels = ["true", "false"]
70
+ synthetic_dataset = get_templated_dataset(candidate_labels, sample_size=8)
71
 
72
  args = TrainingArguments(
73
  batch_size=32,
74
  num_epochs=1
75
  )
76
 
77
+ trainer = SetFitTrainer(
78
  model=few_shot_model,
79
  args=args,
80
+ train_dataset=train_dataset
 
81
  )
82
  trainer.train()
83