meg-huggingface commited on
Commit
b510926
·
1 Parent(s): d9f9788

Editing opening info

Browse files
Files changed (1) hide show
  1. src/about.py +57 -3
src/about.py CHANGED
@@ -22,11 +22,16 @@ NUM_FEWSHOT = 0 # Change with your few shot
22
 
23
 
24
  # Your leaderboard name
25
- TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
26
 
27
  # What does your leaderboard evaluate?
28
  INTRODUCTION_TEXT = """
29
- Intro text
 
 
 
 
 
30
  """
31
 
32
  # Which evaluations are you running? how can people reproduce what you have?
@@ -69,5 +74,54 @@ If everything is done, check you can launch the EleutherAIHarness on your model
69
  """
70
 
71
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
72
- CITATION_BUTTON_TEXT = r"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  """
 
22
 
23
 
24
  # Your leaderboard name
25
+ TITLE = """<h1 align="center" id="space-title">Toxicity leaderboard</h1>"""
26
 
27
  # What does your leaderboard evaluate?
28
  INTRODUCTION_TEXT = """
29
+ # How "toxic" is the language that might be generated from an LLM?
30
+ ## This leaderboard directly addresses this question by applying well-known toxicity evaluation approaches:
31
+
32
+ **Toxicity:** Uses Allen AI's [Real Toxicity Prompts](https://huggingface.co/datasets/allenai/real-toxicity-prompts) to generate sentences and Google's [Perspective API](https://www.perspectiveapi.com) to score their toxicity. [[Source](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/realtoxicityprompts)]
33
+
34
+ **Synthetic Toxicity:** Uses Microsoft's machine-generated ("synthetic") [dataset for hate speech detection, Toxigen](https://github.com/microsoft/TOXIGEN) and corresponding classifier to score their toxicity. [[Source](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/toxigen)]
35
  """
36
 
37
  # Which evaluations are you running? how can people reproduce what you have?
 
74
  """
75
 
76
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
77
+ CITATION_BUTTON_TEXT = r"""@misc{toxicity-leaderboard,
78
+ author = {Margaret Mitchell and Clémentine Fourrier},
79
+ title = {Toxicity Leaderboard},
80
+ year = {2024},
81
+ publisher = {Hugging Face},
82
+ howpublished = "\url{https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard}",
83
+ }
84
+
85
+ @software{eval-harness,
86
+ author = {Gao, Leo and
87
+ Tow, Jonathan and
88
+ Biderman, Stella and
89
+ Black, Sid and
90
+ DiPofi, Anthony and
91
+ Foster, Charles and
92
+ Golding, Laurence and
93
+ Hsu, Jeffrey and
94
+ McDonell, Kyle and
95
+ Muennighoff, Niklas and
96
+ Phang, Jason and
97
+ Reynolds, Laria and
98
+ Tang, Eric and
99
+ Thite, Anish and
100
+ Wang, Ben and
101
+ Wang, Kevin and
102
+ Zou, Andy},
103
+ title = {A framework for few-shot language model evaluation},
104
+ month = sep,
105
+ year = 2021,
106
+ publisher = {Zenodo},
107
+ version = {v0.0.1},
108
+ doi = {10.5281/zenodo.5371628},
109
+ url = {https://doi.org/10.5281/zenodo.5371628},
110
+ }
111
+
112
+ @article{gehman2020realtoxicityprompts,
113
+ title={Realtoxicityprompts: Evaluating neural toxic degeneration in language models},
114
+ author={Gehman, Samuel and Gururangan, Suchin and Sap, Maarten and Choi, Yejin and Smith, Noah A},
115
+ journal={arXiv preprint arXiv:2009.11462},
116
+ year={2020}
117
+ }
118
+
119
+ @inproceedings{hartvigsen2022toxigen,
120
+ title = "{T}oxi{G}en: A Large-Scale Machine-Generated Dataset for Adversarial and Implicit Hate Speech Detection",
121
+ author = "Hartvigsen, Thomas and Gabriel, Saadia and Palangi, Hamid and Sap, Maarten and Ray, Dipankar and Kamar, Ece",
122
+ booktitle = "Proceedings of the 60th Annual Meeting of the Association of Computational Linguistics",
123
+ year = "2022"
124
+ }
125
+
126
+
127
  """