Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ DetectorFactory.seed = 0
|
|
17 |
CHECKPOINT_FILE = "checkpoint.txt"
|
18 |
TOKENIZER_DIR = "tokenizer_model"
|
19 |
TOKENIZER_FILE = os.path.join(TOKENIZER_DIR, "tokenizer.json")
|
20 |
-
MAX_SAMPLES =
|
21 |
|
22 |
# Παγκόσμια μεταβλητή ελέγχου συλλογής
|
23 |
STOP_COLLECTION = False
|
@@ -172,7 +172,7 @@ with gr.Blocks() as demo:
|
|
172 |
dataset_name = gr.Textbox(value="wikimedia/wikipedia", label="Dataset Name")
|
173 |
configs = gr.Textbox(value="20231101.el,20231101.en", label="Configs")
|
174 |
split = gr.Dropdown(choices=["train"], value="train", label="Split")
|
175 |
-
chunk_size = gr.Slider(500, 10000, value=
|
176 |
vocab_size = gr.Slider(20000, 100000, value=50000, label="Vocabulary Size")
|
177 |
min_freq = gr.Slider(1, 100, value=3, label="Minimum Frequency")
|
178 |
test_text = gr.Textbox(value="Η Ακρόπολη είναι σύμβολο της αρχαίας Ελλάδας.", label="Test Text")
|
|
|
17 |
CHECKPOINT_FILE = "checkpoint.txt"
|
18 |
TOKENIZER_DIR = "tokenizer_model"
|
19 |
TOKENIZER_FILE = os.path.join(TOKENIZER_DIR, "tokenizer.json")
|
20 |
+
MAX_SAMPLES = 50000000 # Όριο δειγμάτων
|
21 |
|
22 |
# Παγκόσμια μεταβλητή ελέγχου συλλογής
|
23 |
STOP_COLLECTION = False
|
|
|
172 |
dataset_name = gr.Textbox(value="wikimedia/wikipedia", label="Dataset Name")
|
173 |
configs = gr.Textbox(value="20231101.el,20231101.en", label="Configs")
|
174 |
split = gr.Dropdown(choices=["train"], value="train", label="Split")
|
175 |
+
chunk_size = gr.Slider(500, 10000, value=50000, label="Chunk Size")
|
176 |
vocab_size = gr.Slider(20000, 100000, value=50000, label="Vocabulary Size")
|
177 |
min_freq = gr.Slider(1, 100, value=3, label="Minimum Frequency")
|
178 |
test_text = gr.Textbox(value="Η Ακρόπολη είναι σύμβολο της αρχαίας Ελλάδας.", label="Test Text")
|