Spaces:
Sleeping
Sleeping
update model name field in config
Browse files- config.yaml +1 -1
- src/config.yaml +0 -46
config.yaml
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
model:
|
2 |
-
|
3 |
tokenizer_name_or_path: "answerdotai/ModernBERT-base"
|
4 |
max_length: 880 # 256
|
5 |
dropout: 0.1
|
|
|
1 |
model:
|
2 |
+
name_or_path: "voxmenthe/modernbert-imdb-sentiment"
|
3 |
tokenizer_name_or_path: "answerdotai/ModernBERT-base"
|
4 |
max_length: 880 # 256
|
5 |
dropout: 0.1
|
src/config.yaml
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
model:
|
2 |
-
name: "voxmenthe/modernbert-imdb-sentiment"
|
3 |
-
loss_function:
|
4 |
-
name: "SentimentWeightedLoss" # Options: "SentimentWeightedLoss", "SentimentFocalLoss"
|
5 |
-
# Parameters for the chosen loss function.
|
6 |
-
# For SentimentFocalLoss, common params are:
|
7 |
-
# gamma_focal: 1.0 # (e.g., 2.0 for standard, -2.0 for reversed, 0 for none)
|
8 |
-
# label_smoothing_epsilon: 0.05 # (e.g., 0.0 to 0.1)
|
9 |
-
# For SentimentWeightedLoss, params is empty:
|
10 |
-
params:
|
11 |
-
gamma_focal: 1.0
|
12 |
-
label_smoothing_epsilon: 0.05
|
13 |
-
output_dir: "checkpoints"
|
14 |
-
max_length: 880 # 256
|
15 |
-
dropout: 0.1
|
16 |
-
# --- Pooling Strategy --- #
|
17 |
-
# Options: "cls", "mean", "cls_mean_concat", "weighted_layer", "cls_weighted_concat"
|
18 |
-
# "cls" uses just the [CLS] token for classification
|
19 |
-
# "mean" uses mean pooling over final hidden states for classification
|
20 |
-
# "cls_mean_concat" uses both [CLS] and mean pooling over final hidden states for classification
|
21 |
-
# "weighted_layer" uses a weighted combination of the final hidden states from the top N layers for classification
|
22 |
-
# "cls_weighted_concat" uses a weighted combination of the final hidden states from the top N layers and the [CLS] token for classification
|
23 |
-
|
24 |
-
pooling_strategy: "mean" # Current default, change as needed
|
25 |
-
|
26 |
-
num_weighted_layers: 6 # Number of top BERT layers to use for 'weighted_layer' strategies (e.g., 1 to 12 for BERT-base)
|
27 |
-
|
28 |
-
data:
|
29 |
-
# No specific data paths needed as we use HF datasets at the moment
|
30 |
-
|
31 |
-
training:
|
32 |
-
epochs: 6
|
33 |
-
batch_size: 16
|
34 |
-
lr: 1e-5 # 1e-5 # 2.0e-5
|
35 |
-
weight_decay_rate: 0.02 # 0.01
|
36 |
-
resume_from_checkpoint: "" # "checkpoints/mean_epoch2_0.9361acc_0.9355f1.pt" # Path to checkpoint file, or empty to not resume
|
37 |
-
|
38 |
-
inference:
|
39 |
-
# Default path, can be overridden
|
40 |
-
model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt"
|
41 |
-
# Using the same max_length as training for consistency
|
42 |
-
max_length: 880 # 256
|
43 |
-
|
44 |
-
|
45 |
-
# "answerdotai/ModernBERT-base"
|
46 |
-
# "answerdotai/ModernBERT-large"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|