voxmenthe commited on
Commit
127dca6
·
1 Parent(s): b976908

update model name field in config

Browse files
Files changed (2) hide show
  1. config.yaml +1 -1
  2. src/config.yaml +0 -46
config.yaml CHANGED
@@ -1,5 +1,5 @@
1
  model:
2
- name: "voxmenthe/modernbert-imdb-sentiment"
3
  tokenizer_name_or_path: "answerdotai/ModernBERT-base"
4
  max_length: 880 # 256
5
  dropout: 0.1
 
1
  model:
2
+ name_or_path: "voxmenthe/modernbert-imdb-sentiment"
3
  tokenizer_name_or_path: "answerdotai/ModernBERT-base"
4
  max_length: 880 # 256
5
  dropout: 0.1
src/config.yaml DELETED
@@ -1,46 +0,0 @@
1
- model:
2
- name: "voxmenthe/modernbert-imdb-sentiment"
3
- loss_function:
4
- name: "SentimentWeightedLoss" # Options: "SentimentWeightedLoss", "SentimentFocalLoss"
5
- # Parameters for the chosen loss function.
6
- # For SentimentFocalLoss, common params are:
7
- # gamma_focal: 1.0 # (e.g., 2.0 for standard, -2.0 for reversed, 0 for none)
8
- # label_smoothing_epsilon: 0.05 # (e.g., 0.0 to 0.1)
9
- # For SentimentWeightedLoss, params is empty:
10
- params:
11
- gamma_focal: 1.0
12
- label_smoothing_epsilon: 0.05
13
- output_dir: "checkpoints"
14
- max_length: 880 # 256
15
- dropout: 0.1
16
- # --- Pooling Strategy --- #
17
- # Options: "cls", "mean", "cls_mean_concat", "weighted_layer", "cls_weighted_concat"
18
- # "cls" uses just the [CLS] token for classification
19
- # "mean" uses mean pooling over final hidden states for classification
20
- # "cls_mean_concat" uses both [CLS] and mean pooling over final hidden states for classification
21
- # "weighted_layer" uses a weighted combination of the final hidden states from the top N layers for classification
22
- # "cls_weighted_concat" uses a weighted combination of the final hidden states from the top N layers and the [CLS] token for classification
23
-
24
- pooling_strategy: "mean" # Current default, change as needed
25
-
26
- num_weighted_layers: 6 # Number of top BERT layers to use for 'weighted_layer' strategies (e.g., 1 to 12 for BERT-base)
27
-
28
- data:
29
- # No specific data paths needed as we use HF datasets at the moment
30
-
31
- training:
32
- epochs: 6
33
- batch_size: 16
34
- lr: 1e-5 # 1e-5 # 2.0e-5
35
- weight_decay_rate: 0.02 # 0.01
36
- resume_from_checkpoint: "" # "checkpoints/mean_epoch2_0.9361acc_0.9355f1.pt" # Path to checkpoint file, or empty to not resume
37
-
38
- inference:
39
- # Default path, can be overridden
40
- model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt"
41
- # Using the same max_length as training for consistency
42
- max_length: 880 # 256
43
-
44
-
45
- # "answerdotai/ModernBERT-base"
46
- # "answerdotai/ModernBERT-large"