Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -367,7 +367,7 @@ def evaluate_audio(gt_file, pred_file, metric_config, include_timestamps=False):
|
|
367 |
# Format results as DataFrame
|
368 |
if results:
|
369 |
results_df = pd.DataFrame([results])
|
370 |
-
return results_df, json.dumps(results, indent=2)
|
371 |
else:
|
372 |
return None, "Evaluation completed but no results were generated."
|
373 |
else:
|
@@ -501,7 +501,7 @@ def create_gradio_demo():
|
|
501 |
VERSA is a toolkit dedicated to collecting evaluation metrics in speech and audio quality.
|
502 |
It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
|
503 |
|
504 |
-
With full installation, VERSA offers over
|
505 |
These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching
|
506 |
reference audio, text transcriptions, and text captions.
|
507 |
|
@@ -516,14 +516,22 @@ def create_gradio_demo():
|
|
516 |
### Citation
|
517 |
|
518 |
```
|
519 |
-
@
|
520 |
-
|
521 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
year={2024},
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
url={https://arxiv.org/abs/2412.17667},
|
527 |
}
|
528 |
```
|
529 |
|
|
|
367 |
# Format results as DataFrame
|
368 |
if results:
|
369 |
results_df = pd.DataFrame([results])
|
370 |
+
return results_df.T, json.dumps(results, indent=2)
|
371 |
else:
|
372 |
return None, "Evaluation completed but no results were generated."
|
373 |
else:
|
|
|
501 |
VERSA is a toolkit dedicated to collecting evaluation metrics in speech and audio quality.
|
502 |
It provides a comprehensive connection to cutting-edge evaluation techniques and is tightly integrated with ESPnet.
|
503 |
|
504 |
+
With full installation, VERSA offers over 80 metrics with 700+ metric variations based on different configurations.
|
505 |
These metrics encompass evaluations utilizing diverse external resources, including matching and non-matching
|
506 |
reference audio, text transcriptions, and text captions.
|
507 |
|
|
|
516 |
### Citation
|
517 |
|
518 |
```
|
519 |
+
@inproceedings{shi2025versa,
|
520 |
+
title={{VERSA}: A Versatile Evaluation Toolkit for Speech, Audio, and Music},
|
521 |
+
author={Jiatong Shi and Hye-jin Shim and Jinchuan Tian and Siddhant Arora and Haibin Wu and Darius Petermann and Jia Qi Yip and You Zhang and Yuxun Tang and Wangyou Zhang and Dareen Safar Alharthi and Yichen Huang and Koichi Saito and Jionghao Han and Yiwen Zhao and Chris Donahue and Shinji Watanabe},
|
522 |
+
booktitle={2025 Annual Conference of the North American Chapter of the Association for Computational Linguistics -- System Demonstration Track},
|
523 |
+
year={2025},
|
524 |
+
url={https://openreview.net/forum?id=zU0hmbnyQm}
|
525 |
+
}
|
526 |
+
|
527 |
+
@inproceedings{shi2024versaversatileevaluationtoolkit,
|
528 |
+
author={Shi, Jiatong and Tian, Jinchuan and Wu, Yihan and Jung, Jee-Weon and Yip, Jia Qi and Masuyama, Yoshiki and Chen, William and Wu, Yuning and Tang, Yuxun and Baali, Massa and Alharthi, Dareen and Zhang, Dong and Deng, Ruifan and Srivastava, Tejes and Wu, Haibin and Liu, Alexander and Raj, Bhiksha and Jin, Qin and Song, Ruihua and Watanabe, Shinji},
|
529 |
+
booktitle={2024 IEEE Spoken Language Technology Workshop (SLT)},
|
530 |
+
title={ESPnet-Codec: Comprehensive Training and Evaluation of Neural Codecs For Audio, Music, and Speech},
|
531 |
year={2024},
|
532 |
+
pages={562-569},
|
533 |
+
keywords={Training;Measurement;Codecs;Speech coding;Conferences;Focusing;Neural codecs;codec evaluation},
|
534 |
+
doi={10.1109/SLT61566.2024.10832289}
|
|
|
535 |
}
|
536 |
```
|
537 |
|