ftshijt commited on
Commit
37f33b1
·
verified ·
1 Parent(s): 3e8c109

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -367,7 +367,7 @@ def evaluate_audio(gt_file, pred_file, metric_config, include_timestamps=False):
367
  # Format results as DataFrame
368
  if results:
369
  results_df = pd.DataFrame([results])
370
- return results_df, json.dumps(results, indent=2)
371
  else:
372
  return None, "Evaluation completed but no results were generated."
373
  else:
@@ -501,7 +501,7 @@ def create_gradio_demo():
501
  VERSA is a toolkit dedicated to collecting evaluation metrics in speech and audio quality.
502
  It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
503
 
504
- With full installation, VERSA offers over 60 metrics with 700+ metric variations based on different configurations.
505
  These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching
506
  reference audio, text transcriptions, and text captions.
507
 
@@ -516,14 +516,22 @@ def create_gradio_demo():
516
  ### Citation
517
 
518
  ```
519
- @misc{shi2024versaversatileevaluationtoolkit,
520
- title={VERSA: A Versatile Evaluation Toolkit for Speech, Audio, and Music},
521
- author={Jiatong Shi and Hye-jin Shim and Jinchuan Tian and Siddhant Arora and Haibin Wu and Darius Petermann and Jia Qi Yip and You Zhang and Yuxun Tang and Wangyou Zhang and Dareen Safar Alharthi and Yichen Huang and Koichi Saito and Jionghao Han and Yiwen Zhao and Chris Donahue and Shinji Watanabe},
 
 
 
 
 
 
 
 
 
522
  year={2024},
523
- eprint={2412.17667},
524
- archivePrefix={arXiv},
525
- primaryClass={cs.SD},
526
- url={https://arxiv.org/abs/2412.17667},
527
  }
528
  ```
529
 
 
367
  # Format results as DataFrame
368
  if results:
369
  results_df = pd.DataFrame([results])
370
+ return results_df.T, json.dumps(results, indent=2)
371
  else:
372
  return None, "Evaluation completed but no results were generated."
373
  else:
 
501
  VERSA is a toolkit dedicated to collecting evaluation metrics in speech and audio quality.
502
  It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
503
 
504
+ With full installation, VERSA offers over 80 metrics with 700+ metric variations based on different configurations.
505
  These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching
506
  reference audio, text transcriptions, and text captions.
507
 
 
516
  ### Citation
517
 
518
  ```
519
+ @inproceedings{shi2025versa,
520
+ title={{VERSA}: A Versatile Evaluation Toolkit for Speech, Audio, and Music},
521
+ author={Jiatong Shi and Hye-jin Shim and Jinchuan Tian and Siddhant Arora and Haibin Wu and Darius Petermann and Jia Qi Yip and You Zhang and Yuxun Tang and Wangyou Zhang and Dareen Safar Alharthi and Yichen Huang and Koichi Saito and Jionghao Han and Yiwen Zhao and Chris Donahue and Shinji Watanabe},
522
+ booktitle={2025 Annual Conference of the North American Chapter of the Association for Computational Linguistics -- System Demonstration Track},
523
+ year={2025},
524
+ url={https://openreview.net/forum?id=zU0hmbnyQm}
525
+ }
526
+
527
+ @inproceedings{shi2024versaversatileevaluationtoolkit,
528
+ author={Shi, Jiatong and Tian, Jinchuan and Wu, Yihan and Jung, Jee-Weon and Yip, Jia Qi and Masuyama, Yoshiki and Chen, William and Wu, Yuning and Tang, Yuxun and Baali, Massa and Alharthi, Dareen and Zhang, Dong and Deng, Ruifan and Srivastava, Tejes and Wu, Haibin and Liu, Alexander and Raj, Bhiksha and Jin, Qin and Song, Ruihua and Watanabe, Shinji},
529
+ booktitle={2024 IEEE Spoken Language Technology Workshop (SLT)},
530
+ title={ESPnet-Codec: Comprehensive Training and Evaluation of Neural Codecs For Audio, Music, and Speech},
531
  year={2024},
532
+ pages={562-569},
533
+ keywords={Training;Measurement;Codecs;Speech coding;Conferences;Focusing;Neural codecs;codec evaluation},
534
+ doi={10.1109/SLT61566.2024.10832289}
 
535
  }
536
  ```
537