alozowski HF Staff commited on
Commit
54fa655
·
1 Parent(s): 8002416

Add documentation

Browse files
Files changed (1) hide show
  1. yourbench_space/app.py +124 -116
yourbench_space/app.py CHANGED
@@ -30,8 +30,7 @@ project_description = """
30
  **Dynamic Benchmark Generation for Language Models**
31
 
32
  Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
33
- - 📖 [FAQ](#)
34
- - 💻 [GitHub](https://github.com/huggingface/yourbench)
35
  """
36
 
37
  logger.remove()
@@ -249,138 +248,147 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
249
  gr.Markdown(project_description)
250
 
251
  with gr.Tabs() as tabs:
252
- with gr.Tab("Setup", id=0):
253
- with gr.Row():
254
- with gr.Accordion("Hugging Face Settings"):
255
- login_btn = gr.LoginButton()
256
- hf_org_dropdown = gr.Dropdown(choices=[], label="Organization", allow_custom_value=True)
257
- app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
258
-
259
- hf_dataset_name = gr.Textbox(
260
- label="Dataset name",
261
- value="yourbench",
262
- info="Name of your new evaluation dataset",
263
- )
264
 
265
- with gr.Accordion("Upload Files"):
266
- file_input = gr.File(
267
- label="Upload text files",
268
- file_count="multiple",
269
- file_types=[".txt", ".md", ".html", ".pdf"],
270
- )
271
- output = gr.Textbox(label="Log")
272
- file_input.upload(
273
- save_files,
274
- inputs=[session_state, file_input],
275
- outputs=output,
276
- )
277
- delete_button = gr.Button("Delete Uploaded Files", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
- preview_button = gr.Button("Generate New Config", interactive=False)
280
- log_message = gr.Textbox(label="Log Message", visible=True)
281
- download_button = gr.File(label="Download Config", visible=False, interactive=False)
282
 
283
- file_input.change(
284
- lambda files: gr.update(visible=bool(files)),
285
- inputs=file_input,
286
- outputs=delete_button,
287
- )
 
 
288
 
289
- file_input.change(enable_button, inputs=file_input, outputs=preview_button)
 
 
 
 
290
 
291
- def clean_and_confirm(uid):
292
- MANAGERS.clean_workdir(uid)
293
- return (
294
- "🗑️ All uploaded files have been deleted!",
295
- gr.update(value=None),
296
- gr.update(interactive=False),
 
 
 
297
  )
298
 
299
- delete_button.click(
300
- clean_and_confirm,
301
- inputs=session_state,
302
- outputs=[output, file_input, preview_button],
303
- )
304
 
305
- preview_button.click(
306
- generate_and_return,
307
- inputs=[hf_org_dropdown, hf_dataset_name, session_state],
308
- outputs=[log_message, download_button],
309
- )
310
- preview_button.click(
311
- switch_to_run_generation_tab,
312
- inputs=None,
313
- outputs=tabs,
314
- )
315
 
316
- with gr.Tab("Run Generation", id=1):
317
- with gr.Row():
318
- start_button = gr.Button("Start Task")
319
- stop_button = gr.Button("Stop Task")
320
- kill_button = gr.Button("Kill Task")
321
-
322
- start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
323
- stop_button.click(MANAGERS.stop_process, inputs=session_state)
324
- kill_button.click(MANAGERS.kill_process, inputs=session_state)
325
-
326
- process_status = gr.Checkbox(label="Process Status", interactive=False)
327
- status_timer = gr.Timer(2.0, active=True)
328
- status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
329
-
330
- with gr.Row():
331
- with gr.Accordion("Stages", open=True):
332
- stages_table = gr.CheckboxGroup(
333
- choices=map_stage_names(STAGES),
334
- value=[],
335
- label="Pipeline Stages Completed",
336
- container=False,
337
- interactive=False,
338
- )
339
 
340
- with gr.Row():
341
- with gr.Column():
342
- with gr.Accordion("Log Output", open=True):
343
- log_output = gr.Code(language=None, lines=20, interactive=False)
344
 
345
- with gr.Column():
346
- with gr.Accordion("Ingestion Preview"):
347
- ingestion_df = gr.DataFrame()
 
 
 
 
 
 
348
 
349
- with gr.Accordion("Summarization Preview"):
350
- summarization_df = gr.DataFrame()
 
 
351
 
352
- with gr.Accordion("Single Shot Preview"):
353
- single_shot_df = gr.DataFrame()
 
354
 
355
- with gr.Accordion("Multi Hop Preview"):
356
- multi_hop_df = gr.DataFrame()
357
 
358
- with gr.Accordion("Lighteval Preview"):
359
- lighteval_df = gr.DataFrame()
360
- stages_table.change(
361
- update_dataset,
362
- inputs=[stages_table, hf_org_dropdown, hf_dataset_name],
363
- outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
364
- )
365
 
366
- stages_table.change(
367
- on_generation_succsess,
368
- inputs=stages_table,
369
- outputs=[tabs,btn_launch_evals],
370
- )
371
-
372
- # TODO: this timer should only be active when the second tab is passed to active for the first time
373
- log_timer = gr.Timer(1.0, active=True)
374
- log_timer.tick(
375
- MANAGERS.read_and_get_output,
376
- inputs=session_state,
377
- outputs=[log_output, stages_table],
378
- )
 
 
 
 
 
 
 
 
 
 
 
379
 
380
- with gr.Tab("Evaluate", id=2):
381
  with gr.Column():
382
- gr.Markdown("### 🧪 Run YourBench Evaluation")
383
- gr.Markdown("Run the full evaluation pipeline on the uploaded dataset. This includes computing metrics, creating the leaderboard, and pushing results.")
384
 
385
  with gr.Row():
386
  with gr.Column():
 
30
  **Dynamic Benchmark Generation for Language Models**
31
 
32
  Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
33
+ - 💻 [Yourbench GitHub](https://github.com/huggingface/yourbench)
 
34
  """
35
 
36
  logger.remove()
 
248
  gr.Markdown(project_description)
249
 
250
  with gr.Tabs() as tabs:
251
+ with gr.Tab("Choose Documents & Settings", id=0):
252
+ with gr.Column():
253
+ gr.Markdown("### 📄 Choose your documents and settings")
254
+ gr.Markdown("Upload your source documents that will form the knowledge base for your benchmark. Set a Hugging Face organization and dataset name.")
255
+ gr.Markdown("This step also generates a config file for running the benchmark pipeline. You can download it to run YourBench locally.")
 
 
 
 
 
 
 
256
 
257
+ with gr.Row():
258
+ with gr.Accordion("Hugging Face Settings"):
259
+ login_btn = gr.LoginButton()
260
+ hf_org_dropdown = gr.Dropdown(choices=[], label="Organization", allow_custom_value=True)
261
+ app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
262
+
263
+ hf_dataset_name = gr.Textbox(
264
+ label="Dataset name",
265
+ value="yourbench",
266
+ info="Name of your new evaluation dataset",
267
+ )
268
+
269
+ with gr.Accordion("Upload Files"):
270
+ file_input = gr.File(
271
+ label="Upload text files",
272
+ file_count="multiple",
273
+ file_types=[".txt", ".md", ".html", ".pdf"],
274
+ )
275
+ output = gr.Textbox(label="Log")
276
+ file_input.upload(
277
+ save_files,
278
+ inputs=[session_state, file_input],
279
+ outputs=output,
280
+ )
281
+ delete_button = gr.Button("Delete Uploaded Files", visible=False)
282
+
283
+ preview_button = gr.Button("Generate New Config", interactive=False)
284
+ log_message = gr.Textbox(label="Log Message", visible=True)
285
+ download_button = gr.File(label="Download Config", visible=False, interactive=False)
286
+
287
+ file_input.change(
288
+ lambda files: gr.update(visible=bool(files)),
289
+ inputs=file_input,
290
+ outputs=delete_button,
291
+ )
292
 
293
+ file_input.change(enable_button, inputs=file_input, outputs=preview_button)
 
 
294
 
295
+ def clean_and_confirm(uid):
296
+ MANAGERS.clean_workdir(uid)
297
+ return (
298
+ "🗑️ All uploaded files have been deleted!",
299
+ gr.update(value=None),
300
+ gr.update(interactive=False),
301
+ )
302
 
303
+ delete_button.click(
304
+ clean_and_confirm,
305
+ inputs=session_state,
306
+ outputs=[output, file_input, preview_button],
307
+ )
308
 
309
+ preview_button.click(
310
+ generate_and_return,
311
+ inputs=[hf_org_dropdown, hf_dataset_name, session_state],
312
+ outputs=[log_message, download_button],
313
+ )
314
+ preview_button.click(
315
+ switch_to_run_generation_tab,
316
+ inputs=None,
317
+ outputs=tabs,
318
  )
319
 
320
+ with gr.Tab("Run Benchmark Pipeline", id=1):
321
+ with gr.Column():
322
+ gr.Markdown("### ⚙️ Run the benchmark generation pipeline")
323
+ gr.Markdown("Start the pipeline to process documents, generate questions, and build the private evaluation dataset. Watch logs, track progress, and preview the results.")
 
324
 
325
+ with gr.Row():
326
+ start_button = gr.Button("Start Task")
327
+ stop_button = gr.Button("Stop Task")
328
+ kill_button = gr.Button("Kill Task")
 
 
 
 
 
 
329
 
330
+ start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
331
+ stop_button.click(MANAGERS.stop_process, inputs=session_state)
332
+ kill_button.click(MANAGERS.kill_process, inputs=session_state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
+ process_status = gr.Checkbox(label="Process Status", interactive=False)
335
+ status_timer = gr.Timer(2.0, active=True)
336
+ status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
 
337
 
338
+ with gr.Row():
339
+ with gr.Accordion("Stages", open=True):
340
+ stages_table = gr.CheckboxGroup(
341
+ choices=map_stage_names(STAGES),
342
+ value=[],
343
+ label="Pipeline Stages Completed",
344
+ container=False,
345
+ interactive=False,
346
+ )
347
 
348
+ with gr.Row():
349
+ with gr.Column():
350
+ with gr.Accordion("Log Output", open=True):
351
+ log_output = gr.Code(language=None, lines=20, interactive=False)
352
 
353
+ with gr.Column():
354
+ with gr.Accordion("Ingestion Preview"):
355
+ ingestion_df = gr.DataFrame()
356
 
357
+ with gr.Accordion("Summarization Preview"):
358
+ summarization_df = gr.DataFrame()
359
 
360
+ with gr.Accordion("Single Shot Preview"):
361
+ single_shot_df = gr.DataFrame()
 
 
 
 
 
362
 
363
+ with gr.Accordion("Multi Hop Preview"):
364
+ multi_hop_df = gr.DataFrame()
365
+
366
+ with gr.Accordion("Lighteval Preview"):
367
+ lighteval_df = gr.DataFrame()
368
+ stages_table.change(
369
+ update_dataset,
370
+ inputs=[stages_table, hf_org_dropdown, hf_dataset_name],
371
+ outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
372
+ )
373
+
374
+ stages_table.change(
375
+ on_generation_succsess,
376
+ inputs=stages_table,
377
+ outputs=[tabs,btn_launch_evals],
378
+ )
379
+
380
+ # TODO: this timer should only be active when the second tab is passed to active for the first time
381
+ log_timer = gr.Timer(1.0, active=True)
382
+ log_timer.tick(
383
+ MANAGERS.read_and_get_output,
384
+ inputs=session_state,
385
+ outputs=[log_output, stages_table],
386
+ )
387
 
388
+ with gr.Tab("Evaluate Models on Benchmark", id=2):
389
  with gr.Column():
390
+ gr.Markdown("### 🧪 Evaluate models on your benchmark")
391
+ gr.Markdown("Runs the evaluation with [Lighteval](https://github.com/huggingface/lighteval) on the resulted dataset using 5+ open models, then deploys a leaderboard as a Hugging Face Space under your org.")
392
 
393
  with gr.Row():
394
  with gr.Column():