danielle2003 commited on
Commit
24cc0f4
·
1 Parent(s): c178054
Files changed (1) hide show
  1. scripts/train.py +3 -1
scripts/train.py CHANGED
@@ -3,7 +3,8 @@ from datasets import load_dataset
3
 
4
  # Load dataset (French dataset example: Allociné)
5
  dataset = load_dataset("allocine")
6
-
 
7
  # Load tokenizer
8
  model_name = "distilbert-base-multilingual-cased"
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -38,6 +39,7 @@ trainer = Trainer(
38
  eval_dataset=dataset["test"],
39
  )
40
 
 
41
  # Train model
42
  trainer.train()
43
 
 
3
 
4
  # Load dataset (French dataset example: Allociné)
5
  dataset = load_dataset("allocine")
6
+ dataset["train"] = dataset["train"].select(range(10)) # Train on 500 samples
7
+ dataset["test"] = dataset["test"].select(range(5)) # Test on 200 samples
8
  # Load tokenizer
9
  model_name = "distilbert-base-multilingual-cased"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
39
  eval_dataset=dataset["test"],
40
  )
41
 
42
+
43
  # Train model
44
  trainer.train()
45