Toumaima commited on
Commit
72f623a
·
verified ·
1 Parent(s): 513d2f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -0
app.py CHANGED
@@ -1,3 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def run_and_submit_all(profile: gr.OAuthProfile | None):
2
  space_id = os.getenv("SPACE_ID")
3
  if profile:
@@ -102,3 +269,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
102
 
103
  except Exception as e:
104
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import string
5
+ import warnings
6
+ import pandas as pd
7
+ from huggingface_hub import login
8
+ import re
9
+ import json
10
+ from groq import Groq
11
+
12
+ # --- Constants ---
13
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
+
15
+ # --- Basic Agent Definition ---
16
+ class BasicAgent:
17
+ def __init__(self):
18
+ print("BasicAgent initialized.")
19
+ self.client = Groq(api_key=os.environ["GROQ_API_KEY"])
20
+ self.agent_prompt = (
21
+ """You are a general AI assistant. I will ask you a question. Report your thoughts, and
22
+ finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
23
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
24
+ list of numbers and/or strings.
25
+ If you are asked for a number, don't use comma to write your number neither use units such as $
26
+ or percent sign unless specified otherwise.
27
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the
28
+ digits in plain text unless specified otherwise.
29
+ If you are asked for a comma separated list, apply the above rules depending of whether the element
30
+ to be put in the list is a number or a string."""
31
+ )
32
+
33
+ def format_final_answer(self, answer: str) -> str:
34
+ cleaned = " ".join(answer.split())
35
+ return f"FINAL ANSWER: {cleaned}"
36
+
37
+ def check_commutativity(self):
38
+ S = ['a', 'b', 'c', 'd', 'e']
39
+ counter_example_elements = set()
40
+ index = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}
41
+ self.operation_table = [
42
+ ['a', 'b', 'c', 'b', 'd'],
43
+ ['b', 'c', 'a', 'e', 'c'],
44
+ ['c', 'a', 'b', 'b', 'a'],
45
+ ['b', 'e', 'b', 'e', 'd'],
46
+ ['d', 'b', 'a', 'd', 'c']
47
+ ]
48
+ for x in S:
49
+ for y in S:
50
+ x_idx = index[x]
51
+ y_idx = index[y]
52
+ if self.operation_table[x_idx][y_idx] != self.operation_table[y_idx][x_idx]:
53
+ counter_example_elements.add(x)
54
+ counter_example_elements.add(y)
55
+ return self.format_final_answer(", ".join(sorted(counter_example_elements)))
56
+
57
+ def maybe_reversed(self, text: str) -> bool:
58
+ words = text.split()
59
+ reversed_ratio = sum(
60
+ 1 for word in words if word[::-1].lower() in {
61
+ "if", "you", "understand", "this", "sentence", "write",
62
+ "opposite", "of", "the", "word", "left", "answer"
63
+ }
64
+ ) / len(words)
65
+ return reversed_ratio > 0.3
66
+
67
+ def solve_riddle(self, question: str) -> str:
68
+ question = question[::-1]
69
+ if "opposite of the word" in question:
70
+ match = re.search(r"opposite of the word ['\"](\w+)['\"]", question)
71
+ if match:
72
+ word = match.group(1).lower()
73
+ opposites = {
74
+ "left": "right", "up": "down", "hot": "cold",
75
+ "true": "false", "yes": "no", "black": "white"
76
+ }
77
+ opposite = opposites.get(word, f"UNKNOWN_OPPOSITE_OF_{word}")
78
+ return "FINAL ANSWER: RIGHT"
79
+ return self.format_final_answer("COULD_NOT_SOLVE")
80
+
81
+ def query_groq(self, question: str) -> str:
82
+ full_prompt = f"{self.agent_prompt}\n\nQuestion: {question}"
83
+ try:
84
+ response = self.client.chat.completions.create(
85
+ model="llama3-8b-8192",
86
+ messages=[{"role": "user", "content": full_prompt}]
87
+ )
88
+ answer = response.choices[0].message.content
89
+ if "FINAL ANSWER: " in answer:
90
+ return answer.split("FINAL ANSWER: ")[-1].strip().upper()
91
+ else:
92
+ return self.format_final_answer(answer).upper()
93
+ except Exception as e:
94
+ print(f"[Groq ERROR]: {e}")
95
+ return self.format_final_answer("GROQ_ERROR")
96
+
97
+ def __call__(self, question: str) -> str:
98
+ print(f"Received question: {question[:50]}...")
99
+ if "commutative" in question.lower():
100
+ return self.check_commutativity()
101
+ if self.maybe_reversed(question):
102
+ print("Detected likely reversed riddle.")
103
+ return self.solve_riddle(question)
104
+ return self.query_groq(question)
105
+
106
+ # --- Answer Scoring ---
107
+ def question_scorer(model_answer: str, ground_truth: str) -> bool:
108
+ def normalize_str(input_str, remove_punct=True) -> str:
109
+ no_spaces = re.sub(r"\s", "", input_str)
110
+ if remove_punct:
111
+ translator = str.maketrans("", "", string.punctuation)
112
+ return no_spaces.lower().translate(translator)
113
+ else:
114
+ return no_spaces.lower()
115
+
116
+ def normalize_number_str(number_str: str) -> float | None:
117
+ for char in ["$", "%", ","]:
118
+ number_str = number_str.replace(char, "")
119
+ try:
120
+ return float(number_str)
121
+ except ValueError:
122
+ print(f"String '{number_str}' cannot be normalized to number.")
123
+ return None
124
+
125
+ def split_string(s: str, char_list: list[str] = [",", ";"]) -> list[str]:
126
+ pattern = f"[{''.join(map(re.escape, char_list))}]"
127
+ return [elem.strip() for elem in re.split(pattern, s)]
128
+
129
+ def is_float(val) -> bool:
130
+ try:
131
+ float(val)
132
+ return True
133
+ except ValueError:
134
+ return False
135
+
136
+ if model_answer is None:
137
+ model_answer = "None"
138
+
139
+ if is_float(ground_truth):
140
+ print(f"Evaluating '{model_answer}' as a number.")
141
+ normalized = normalize_number_str(model_answer)
142
+ return normalized == float(ground_truth) if normalized is not None else False
143
+
144
+ elif any(char in ground_truth for char in [",", ";"]):
145
+ print(f"Evaluating '{model_answer}' as a comma/semicolon-separated list.")
146
+ gt_elems = split_string(ground_truth)
147
+ ma_elems = split_string(model_answer)
148
+
149
+ if len(gt_elems) != len(ma_elems):
150
+ warnings.warn("Answer lists have different lengths, returning False.", UserWarning)
151
+ return False
152
+
153
+ for ma_elem, gt_elem in zip(ma_elems, gt_elems):
154
+ if is_float(gt_elem):
155
+ normalized = normalize_number_str(ma_elem)
156
+ if normalized != float(gt_elem):
157
+ return False
158
+ else:
159
+ if normalize_str(ma_elem, remove_punct=False) != normalize_str(gt_elem, remove_punct=False):
160
+ return False
161
+ return True
162
+
163
+ else:
164
+ print(f"Evaluating '{model_answer}' as a string.")
165
+ return normalize_str(model_answer) == normalize_str(ground_truth)
166
+
167
+ # --- Run and Submit All ---
168
  def run_and_submit_all(profile: gr.OAuthProfile | None):
169
  space_id = os.getenv("SPACE_ID")
170
  if profile:
 
269
 
270
  except Exception as e:
271
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
272
+
273
+ # --- Build Gradio Interface ---
274
+ with gr.Blocks() as demo:
275
+ gr.Markdown("# Basic Agent Evaluation Runner")
276
+ gr.LoginButton()
277
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
278
+ status_output = gr.Textbox(label="Run Status / Submission Result", max_lines=5, interactive=False, max_length=200)
279
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
280
+
281
+ run_button.click(
282
+ fn=run_and_submit_all,
283
+ outputs=[status_output, results_table]
284
+ )
285
+
286
+ if __name__ == "__main__":
287
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
288
+ demo.launch(debug=True, share=False)