tymbos commited on
Commit
92bb5cb
·
verified ·
1 Parent(s): 60ccdc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -17,7 +17,7 @@ DetectorFactory.seed = 0
17
  CHECKPOINT_FILE = "checkpoint.txt"
18
  TOKENIZER_DIR = "tokenizer_model"
19
  TOKENIZER_FILE = os.path.join(TOKENIZER_DIR, "tokenizer.json")
20
- MAX_SAMPLES = 3000000 # Όριο δειγμάτων
21
 
22
  # Παγκόσμια μεταβλητή ελέγχου συλλογής
23
  STOP_COLLECTION = False
@@ -172,7 +172,7 @@ with gr.Blocks() as demo:
172
  dataset_name = gr.Textbox(value="wikimedia/wikipedia", label="Dataset Name")
173
  configs = gr.Textbox(value="20231101.el,20231101.en", label="Configs")
174
  split = gr.Dropdown(choices=["train"], value="train", label="Split")
175
- chunk_size = gr.Slider(500, 10000, value=5000, label="Chunk Size")
176
  vocab_size = gr.Slider(20000, 100000, value=50000, label="Vocabulary Size")
177
  min_freq = gr.Slider(1, 100, value=3, label="Minimum Frequency")
178
  test_text = gr.Textbox(value="Η Ακρόπολη είναι σύμβολο της αρχαίας Ελλάδας.", label="Test Text")
 
17
  CHECKPOINT_FILE = "checkpoint.txt"
18
  TOKENIZER_DIR = "tokenizer_model"
19
  TOKENIZER_FILE = os.path.join(TOKENIZER_DIR, "tokenizer.json")
20
+ MAX_SAMPLES = 50000000 # Όριο δειγμάτων
21
 
22
  # Παγκόσμια μεταβλητή ελέγχου συλλογής
23
  STOP_COLLECTION = False
 
172
  dataset_name = gr.Textbox(value="wikimedia/wikipedia", label="Dataset Name")
173
  configs = gr.Textbox(value="20231101.el,20231101.en", label="Configs")
174
  split = gr.Dropdown(choices=["train"], value="train", label="Split")
175
+ chunk_size = gr.Slider(500, 10000, value=50000, label="Chunk Size")
176
  vocab_size = gr.Slider(20000, 100000, value=50000, label="Vocabulary Size")
177
  min_freq = gr.Slider(1, 100, value=3, label="Minimum Frequency")
178
  test_text = gr.Textbox(value="Η Ακρόπολη είναι σύμβολο της αρχαίας Ελλάδας.", label="Test Text")