Riddhi Bhagwat commited on
Commit
15efe4a
·
1 Parent(s): afda8d0

leaderboard updates

Browse files
Files changed (2) hide show
  1. app/app.py +159 -67
  2. app/leadboard_config.py +2 -2
app/app.py CHANGED
@@ -24,6 +24,7 @@ from pandas import DataFrame
24
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
25
  import threading
26
  from collections import defaultdict
 
27
 
28
 
29
  BASE_MODEL = os.getenv("MODEL", "google/gemma-3-12b-pt")
@@ -38,6 +39,11 @@ TEXT_ONLY = (
38
  else False
39
  )
40
 
 
 
 
 
 
41
 
42
  def create_inference_client(
43
  model: Optional[str] = None, base_url: Optional[str] = None
@@ -136,6 +142,49 @@ def load_languages() -> dict[str, str]:
136
 
137
  LANGUAGES = load_languages()
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  USER_AGREEMENT = """
140
  You have been asked to participate in a research study conducted by Lingo Lab from the Computer Science and Artificial Intelligence Laboratory at the Massachusetts Institute of Technology (M.I.T.), together with huggingface.
141
 
@@ -159,9 +208,6 @@ def add_user_message(history, message):
159
 
160
 
161
  def format_system_message(language: str):
162
- # Use a higher temperature with randomization for more diversity
163
- random_temp = random.uniform(1.3, 2.0) # More random between 1.3 and 2.0
164
-
165
  system_message = [
166
  {
167
  "role": "system",
@@ -172,8 +218,7 @@ def format_system_message(language: str):
172
  "content": f"Start by asking me a question in {language}."
173
  }
174
  ]
175
- response = call_pipeline(system_message, temperature=random_temp)
176
-
177
  new_system_message = [
178
  {
179
  "role": "system",
@@ -287,10 +332,8 @@ def add_fake_like_data(
287
 
288
 
289
  @spaces.GPU
290
- def call_pipeline(messages: list, temperature: float = 0.7):
291
  """Call the appropriate model pipeline based on configuration"""
292
-
293
-
294
  if ZERO_GPU:
295
  tokenizer = CLIENT["tokenizer"]
296
  # Ensure messages follow the proper alternating pattern
@@ -334,9 +377,8 @@ def call_pipeline(messages: list, temperature: float = 0.7):
334
  clean_up_tokenization_spaces=False,
335
  max_length=2000,
336
  return_full_text=False,
337
- temperature=temperature,
338
  do_sample=True,
339
- top_p=0.9, # Add top_p sampling for more diversity
340
  )
341
 
342
  return response[0]["generated_text"]
@@ -345,7 +387,6 @@ def call_pipeline(messages: list, temperature: float = 0.7):
345
  messages,
346
  clean_up_tokenization_spaces=False,
347
  max_length=2000,
348
- temperature=temperature,
349
  )
350
  return response[0]["generated_text"][-1]["content"]
351
 
@@ -361,18 +402,15 @@ def respond(
361
  Return the history with the new message"""
362
  messages = format_history_as_messages(history)
363
 
364
- # Use provided temperature or default to 0.7
365
- temp = temperature if temperature is not None else 0.7
366
-
367
  if ZERO_GPU:
368
- content = call_pipeline(messages, temperature=temp)
369
  else:
370
  response = CLIENT.chat.completions.create(
371
  messages=messages,
372
  max_tokens=2000,
373
  stream=False,
374
  seed=seed,
375
- temperature=temp,
376
  )
377
  content = response.choices[0].message.content
378
 
@@ -416,7 +454,7 @@ def wrangle_like_data(x: gr.LikeData, history) -> DataFrame:
416
  message["metadata"] = {}
417
  elif not isinstance(message["metadata"], dict):
418
  message["metadata"] = message["metadata"].__dict__
419
-
420
  rating = message["metadata"].get("title")
421
  if rating == "liked":
422
  message["rating"] = 1
@@ -529,21 +567,17 @@ def wrangle_retry_data(
529
  language=language,
530
  )
531
 
532
- # Use randomized temperature for more varied responses when retrying
533
- random_temp = random.randint(70, 150) / 100 # Between 0.7 and 1.5
534
- random_seed = random.randint(0, 1000000)
535
-
536
  # Return the history without a new message
537
  history = respond(
538
  history=history[:-1],
539
  language=language,
540
- temperature=random_temp,
541
- seed=random_seed,
542
  )
543
  return history, update_dataframe(dataframe, history)
544
 
545
  # Global variables for tracking language data points
546
- LANGUAGE_DATA_POINTS = defaultdict(int)
547
  language_data_lock = threading.Lock()
548
 
549
  def get_leaderboard_data():
@@ -568,7 +602,7 @@ def set_language_data_points(language, count):
568
  def load_initial_language_data():
569
  """Load initial language data points from persistent storage or default values"""
570
  data_points_path, use_persistent = get_persistent_storage_path("language_data_points.json")
571
-
572
  if data_points_path.exists():
573
  try:
574
  with open(data_points_path, "r", encoding="utf-8") as f:
@@ -578,17 +612,17 @@ def load_initial_language_data():
578
  LANGUAGE_DATA_POINTS.update(data)
579
  except Exception as e:
580
  print(f"Error loading language data points: {e}")
581
-
582
  for lang in LANGUAGES.keys():
583
  if lang not in LANGUAGE_DATA_POINTS:
584
  LANGUAGE_DATA_POINTS[lang] = 0
585
-
586
  return get_leaderboard_data()
587
 
588
  def save_language_data_points():
589
  """Save language data points to persistent storage"""
590
  data_points_path, use_persistent = get_persistent_storage_path("language_data_points.json")
591
-
592
  try:
593
  with language_data_lock:
594
  with open(data_points_path, "w", encoding="utf-8") as f:
@@ -616,7 +650,7 @@ def submit_conversation(dataframe, conversation_id, session_id, language):
616
  save_feedback(input_object=conversation_data)
617
  leaderboard_data = increment_language_data_point(language)
618
  save_language_data_points()
619
-
620
  return (gr.Dataframe(value=None, interactive=False), [], leaderboard_data)
621
 
622
 
@@ -948,12 +982,44 @@ js = '''function js(){
948
  }
949
  }'''
950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
951
 
952
  with gr.Blocks(css=css, js=js) as demo:
953
  user_consented = gr.State(value=False)
954
  language = gr.State(value="English") # Default language state
955
  leaderboard_data = gr.State([])
956
-
957
  # Main application interface (initially hidden)
958
  with gr.Group() as main_app:
959
  with gr.Row():
@@ -1029,36 +1095,71 @@ with gr.Blocks(css=css, js=js) as demo:
1029
  elem_classes=["add-language-btn"]
1030
  )
1031
 
 
 
1032
  # Right column with leaderboard
1033
  with gr.Column(scale=3, elem_classes=["leaderboard-container"]):
1034
  gr.Markdown("# Language Leaderboard", elem_classes=["leaderboard-title"])
1035
  leaderboard_html = gr.HTML("Loading leaderboard...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1036
 
 
1037
  with gr.Accordion("Admin Controls", open=False, visible=False) as admin_panel:
1038
  with gr.Row():
1039
  admin_language = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
1040
  admin_count = gr.Number(value=0, label="Data Points")
1041
  set_count_btn = gr.Button("Set Count")
1042
-
1043
  # toggle button for admin panel?
1044
  admin_toggle = gr.Button("Admin Controls", visible=True)
 
 
 
 
 
 
1045
 
1046
- # update leaderboard HTML
1047
- def update_leaderboard_html(data):
1048
- if not data:
1049
- return "Loading leaderboard..."
1050
-
1051
- html = "<div class='leaderboard-content'>"
1052
- for idx, (lang, count) in enumerate(data):
1053
- html += f"""
1054
- <div class='leaderboard-item'>
1055
- <span class='leaderboard-rank'>#{idx+1}</span>
1056
- <span class='leaderboard-language'>{lang}</span>
1057
- <span class='leaderboard-count'>{count}</span>
1058
- </div>
1059
- """
1060
- html += "</div>"
1061
- return html
1062
 
1063
 
1064
  # Create a hidden group instead of a modal
@@ -1191,13 +1292,9 @@ with gr.Blocks(css=css, js=js) as demo:
1191
 
1192
  # Update the consent button click handler
1193
  consent_btn.click(
1194
- fn=lambda: True,
1195
- outputs=user_consented,
1196
- js="() => set_cookie('feel_consent', 'true')"
1197
- ).then(
1198
- fn=update_visibility,
1199
- inputs=user_consented,
1200
- outputs=[main_app, consent_overlay, consent_modal, footer_banner, footer_section]
1201
  )
1202
 
1203
  ##############################
@@ -1263,32 +1360,27 @@ with gr.Blocks(css=css, js=js) as demo:
1263
  outputs=[conversation_id],
1264
  )
1265
 
1266
- def initialize_app():
1267
- """Initialize the app with session ID, language, and leaderboard data"""
1268
  global LANGUAGES
1269
  LANGUAGES = load_languages()
1270
  language_choices = list(LANGUAGES.keys())
1271
  default_language = language_choices[0] if language_choices else "English"
1272
 
1273
- # Load initial leaderboard data
1274
- leaderboard = load_initial_language_data()
1275
-
1276
- # Return exactly 3 values as expected
1277
- return str(uuid.uuid4()), default_language, leaderboard
1278
-
1279
  def toggle_admin_panel(visible):
1280
  return gr.Accordion(visible=not visible)
1281
-
1282
  def handle_set_count(language, count):
1283
  updated_data = set_language_data_points(language, int(count))
1284
  save_language_data_points()
1285
  return update_leaderboard_html(updated_data), updated_data
1286
-
1287
  demo.load(
1288
- fn=initialize_app,
1289
  inputs=None,
1290
  outputs=[
1291
- session_id,
1292
  language,
1293
  leaderboard_data
1294
  ]
@@ -1393,7 +1485,7 @@ with gr.Blocks(css=css, js=js) as demo:
1393
  inputs=[admin_panel],
1394
  outputs=[admin_panel]
1395
  )
1396
-
1397
  set_count_btn.click(
1398
  fn=handle_set_count,
1399
  inputs=[admin_language, admin_count],
 
24
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
25
  import threading
26
  from collections import defaultdict
27
+ from datasets import load_dataset
28
 
29
 
30
  BASE_MODEL = os.getenv("MODEL", "google/gemma-3-12b-pt")
 
39
  else False
40
  )
41
 
42
+ # os.environ["HF_DATASETS_CACHE"] = "/data/datasets_cache"
43
+
44
+ # # caches dataset after first download
45
+ # dataset = load_dataset("feel-fl/feel-feedback")
46
+
47
 
48
  def create_inference_client(
49
  model: Optional[str] = None, base_url: Optional[str] = None
 
142
 
143
  LANGUAGES = load_languages()
144
 
145
+ def update_language_counts_from_dataset():
146
+ """update language data points count from the dataset"""
147
+ data_file, use_persistent = get_persistent_storage_path("language_data_points.json")
148
+
149
+ if data_file.exists():
150
+ with open(data_file, "r", encoding="utf-8") as f:
151
+ try:
152
+ data = json.load(f)
153
+ except json.JSONDecodeError:
154
+ print("error reading data file. Creating new data.")
155
+ data = {}
156
+ else:
157
+ data = {}
158
+
159
+ cache_dir, _ = get_persistent_storage_path("datasets_cache")
160
+ os.environ["HF_DATASETS_CACHE"] = str(cache_dir)
161
+
162
+ try:
163
+ # load the dataset (cached after first download - note that this might need to be changed because
164
+ # we dont want it to only refer to some old cached version if there have been updates since)
165
+ print("loading dataset from HuggingFace...")
166
+ dataset = load_dataset("feel-fl/feel-feedback")
167
+
168
+ train_dataset = dataset["train"]
169
+ df = train_dataset.to_pandas()
170
+
171
+ if 'language' in df.columns:
172
+ language_counts = df['language'].value_counts().to_dict()
173
+ for lang, count in language_counts.items():
174
+ data[lang] = count
175
+
176
+ print(f"Updated counts from dataset for {len(language_counts)} languages")
177
+ else:
178
+ print("Warning: No 'language' column found in the dataset.")
179
+ print("Available columns:", df.columns.tolist())
180
+ except Exception as e:
181
+ print(f"Error updating from dataset: {e}")
182
+
183
+ with open(data_file, "w", encoding="utf-8") as f:
184
+ json.dump(data, f, ensure_ascii=False, indent=2)
185
+
186
+ return data
187
+
188
  USER_AGREEMENT = """
189
  You have been asked to participate in a research study conducted by Lingo Lab from the Computer Science and Artificial Intelligence Laboratory at the Massachusetts Institute of Technology (M.I.T.), together with huggingface.
190
 
 
208
 
209
 
210
  def format_system_message(language: str):
 
 
 
211
  system_message = [
212
  {
213
  "role": "system",
 
218
  "content": f"Start by asking me a question in {language}."
219
  }
220
  ]
221
+ response = call_pipeline(system_message)
 
222
  new_system_message = [
223
  {
224
  "role": "system",
 
332
 
333
 
334
  @spaces.GPU
335
+ def call_pipeline(messages: list):
336
  """Call the appropriate model pipeline based on configuration"""
 
 
337
  if ZERO_GPU:
338
  tokenizer = CLIENT["tokenizer"]
339
  # Ensure messages follow the proper alternating pattern
 
377
  clean_up_tokenization_spaces=False,
378
  max_length=2000,
379
  return_full_text=False,
380
+ temperature=0.7,
381
  do_sample=True,
 
382
  )
383
 
384
  return response[0]["generated_text"]
 
387
  messages,
388
  clean_up_tokenization_spaces=False,
389
  max_length=2000,
 
390
  )
391
  return response[0]["generated_text"][-1]["content"]
392
 
 
402
  Return the history with the new message"""
403
  messages = format_history_as_messages(history)
404
 
 
 
 
405
  if ZERO_GPU:
406
+ content = call_pipeline(messages)
407
  else:
408
  response = CLIENT.chat.completions.create(
409
  messages=messages,
410
  max_tokens=2000,
411
  stream=False,
412
  seed=seed,
413
+ temperature=temperature,
414
  )
415
  content = response.choices[0].message.content
416
 
 
454
  message["metadata"] = {}
455
  elif not isinstance(message["metadata"], dict):
456
  message["metadata"] = message["metadata"].__dict__
457
+
458
  rating = message["metadata"].get("title")
459
  if rating == "liked":
460
  message["rating"] = 1
 
567
  language=language,
568
  )
569
 
 
 
 
 
570
  # Return the history without a new message
571
  history = respond(
572
  history=history[:-1],
573
  language=language,
574
+ temperature=random.randint(1, 100) / 100,
575
+ seed=random.randint(0, 1000000),
576
  )
577
  return history, update_dataframe(dataframe, history)
578
 
579
  # Global variables for tracking language data points
580
+ LANGUAGE_DATA_POINTS = update_language_counts_from_dataset()
581
  language_data_lock = threading.Lock()
582
 
583
  def get_leaderboard_data():
 
602
  def load_initial_language_data():
603
  """Load initial language data points from persistent storage or default values"""
604
  data_points_path, use_persistent = get_persistent_storage_path("language_data_points.json")
605
+
606
  if data_points_path.exists():
607
  try:
608
  with open(data_points_path, "r", encoding="utf-8") as f:
 
612
  LANGUAGE_DATA_POINTS.update(data)
613
  except Exception as e:
614
  print(f"Error loading language data points: {e}")
615
+
616
  for lang in LANGUAGES.keys():
617
  if lang not in LANGUAGE_DATA_POINTS:
618
  LANGUAGE_DATA_POINTS[lang] = 0
619
+
620
  return get_leaderboard_data()
621
 
622
  def save_language_data_points():
623
  """Save language data points to persistent storage"""
624
  data_points_path, use_persistent = get_persistent_storage_path("language_data_points.json")
625
+
626
  try:
627
  with language_data_lock:
628
  with open(data_points_path, "w", encoding="utf-8") as f:
 
650
  save_feedback(input_object=conversation_data)
651
  leaderboard_data = increment_language_data_point(language)
652
  save_language_data_points()
653
+
654
  return (gr.Dataframe(value=None, interactive=False), [], leaderboard_data)
655
 
656
 
 
982
  }
983
  }'''
984
 
985
+ def render_leaderboard():
986
+ """Render the leaderboard HTML"""
987
+ counts = update_language_counts_from_dataset()
988
+ languages = LANGUAGES
989
+
990
+ sorted_langs = sorted(
991
+ [(lang, counts.get(lang, 0)) for lang in languages.keys()],
992
+ key=lambda x: x[1],
993
+ reverse=True
994
+ )
995
+
996
+ html = """
997
+ <table class="leaderboard">
998
+ <tr>
999
+ <th>Rank</th>
1000
+ <th>Language</th>
1001
+ <th>Data Points</th>
1002
+ </tr>
1003
+ """
1004
+
1005
+ for i, (lang, count) in enumerate(sorted_langs):
1006
+ html += f"""
1007
+ <tr>
1008
+ <td>{i+1}</td>
1009
+ <td>{lang}</td>
1010
+ <td>{count}</td>
1011
+ </tr>
1012
+ """
1013
+
1014
+ html += "</table>"
1015
+ return html
1016
+
1017
 
1018
  with gr.Blocks(css=css, js=js) as demo:
1019
  user_consented = gr.State(value=False)
1020
  language = gr.State(value="English") # Default language state
1021
  leaderboard_data = gr.State([])
1022
+
1023
  # Main application interface (initially hidden)
1024
  with gr.Group() as main_app:
1025
  with gr.Row():
 
1095
  elem_classes=["add-language-btn"]
1096
  )
1097
 
1098
+
1099
+
1100
  # Right column with leaderboard
1101
  with gr.Column(scale=3, elem_classes=["leaderboard-container"]):
1102
  gr.Markdown("# Language Leaderboard", elem_classes=["leaderboard-title"])
1103
  leaderboard_html = gr.HTML("Loading leaderboard...")
1104
+ refresh_leaderboard_btn = gr.Button("Refresh Counts from Dataset")
1105
+ leaderboard_html.value = render_leaderboard()
1106
+
1107
+ # HELPERS:
1108
+ def update_func():
1109
+ update_language_counts_from_dataset()
1110
+ return render_leaderboard()
1111
+
1112
+
1113
+ def set_language_count(language, count):
1114
+ """admin function to manually set language count"""
1115
+ if not language:
1116
+ return render_leaderboard()
1117
+
1118
+ data_file, _ = get_persistent_storage_path("language_data_points.json")
1119
+
1120
+ if data_file.exists():
1121
+ with open(data_file, "r", encoding="utf-8") as f:
1122
+ try:
1123
+ data = json.load(f)
1124
+ except json.JSONDecodeError:
1125
+ data = {}
1126
+ else:
1127
+ data = {}
1128
+ data[language] = int(count)
1129
+
1130
+ with open(data_file, "w", encoding="utf-8") as f:
1131
+ json.dump(data, f, ensure_ascii=False, indent=2)
1132
+
1133
+ return render_leaderboard()
1134
+
1135
+
1136
+ refresh_leaderboard_btn.click(
1137
+ update_func,
1138
+ outputs=leaderboard_html
1139
+ )
1140
+
1141
+
1142
 
1143
+
1144
  with gr.Accordion("Admin Controls", open=False, visible=False) as admin_panel:
1145
  with gr.Row():
1146
  admin_language = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
1147
  admin_count = gr.Number(value=0, label="Data Points")
1148
  set_count_btn = gr.Button("Set Count")
1149
+
1150
  # toggle button for admin panel?
1151
  admin_toggle = gr.Button("Admin Controls", visible=True)
1152
+
1153
+ set_count_btn.click(
1154
+ set_language_count,
1155
+ inputs=[admin_language, admin_count],
1156
+ outputs=leaderboard_html
1157
+ )
1158
 
1159
+ admin_toggle.click(
1160
+ gr.update(visible=True),
1161
+ outputs=admin_panel
1162
+ )
 
 
 
 
 
 
 
 
 
 
 
 
1163
 
1164
 
1165
  # Create a hidden group instead of a modal
 
1292
 
1293
  # Update the consent button click handler
1294
  consent_btn.click(
1295
+ fn=show_main_app,
1296
+ inputs=[],
1297
+ outputs=[landing_page, main_app, user_consented]
 
 
 
 
1298
  )
1299
 
1300
  ##############################
 
1360
  outputs=[conversation_id],
1361
  )
1362
 
1363
+ def on_app_load():
 
1364
  global LANGUAGES
1365
  LANGUAGES = load_languages()
1366
  language_choices = list(LANGUAGES.keys())
1367
  default_language = language_choices[0] if language_choices else "English"
1368
 
1369
+ return str(uuid.uuid4()), gr.Dropdown(choices=language_choices, value=default_language), default_language
1370
+
 
 
 
 
1371
  def toggle_admin_panel(visible):
1372
  return gr.Accordion(visible=not visible)
1373
+
1374
  def handle_set_count(language, count):
1375
  updated_data = set_language_data_points(language, int(count))
1376
  save_language_data_points()
1377
  return update_leaderboard_html(updated_data), updated_data
1378
+
1379
  demo.load(
1380
+ fn=lambda: (on_app_load(), load_initial_language_data()),
1381
  inputs=None,
1382
  outputs=[
1383
+ session_id,
1384
  language,
1385
  leaderboard_data
1386
  ]
 
1485
  inputs=[admin_panel],
1486
  outputs=[admin_panel]
1487
  )
1488
+
1489
  set_count_btn.click(
1490
  fn=handle_set_count,
1491
  inputs=[admin_language, admin_count],
app/leadboard_config.py CHANGED
@@ -56,5 +56,5 @@ def set_initial_counts():
56
  print("Please provide both --language and --count arguments")
57
  parser.print_help()
58
 
59
- if __name__ == "__main__":
60
- set_initial_counts()
 
56
  print("Please provide both --language and --count arguments")
57
  parser.print_help()
58
 
59
+ # if __name__ == "__main__":
60
+ # set_initial_counts()