Toumaima commited on
Commit
16600af
·
verified ·
1 Parent(s): 273ef8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -57
app.py CHANGED
@@ -4,91 +4,267 @@ import requests
4
  import pandas as pd
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
 
 
 
 
 
 
 
 
 
 
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
- # --- BasicAgent Class ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class BasicAgent:
12
  def __init__(self):
13
  print("BasicAgent initialized.")
14
- self.llm = AutoModelForCausalLM.from_pretrained("gpt2")
15
- self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
16
  self.agent_prompt = (
17
- "You are a general AI assistant. I will ask you a question. "
18
- "Finish your answer with the format: FINAL ANSWER: [YOUR FINAL ANSWER]."
 
 
 
 
 
 
 
 
19
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def __call__(self, question: str) -> str:
22
- prompt = f"{self.agent_prompt}\n\nQuestion: {question}"
23
- inputs = self.tokenizer(prompt, return_tensors="pt")
24
- outputs = self.llm.generate(**inputs, max_new_tokens=50)
25
- result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
26
- final = result.split("FINAL ANSWER:")[-1].strip()
27
- return f"FINAL ANSWER: {final}" if final else "FINAL ANSWER: UNKNOWN"
28
-
29
- # --- Run and Submit Function ---
30
- def run_and_submit_all(profile):
31
- space_id = os.getenv("SPACE_ID", "your-username/your-space") # fallback
32
- if not profile or not getattr(profile, "username", None):
33
- return "❌ Please log in to Hugging Face first.", None
34
-
35
- username = profile.username
36
- agent = BasicAgent()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  try:
39
- questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
 
 
 
 
40
  except Exception as e:
41
- return f"Error fetching questions: {e}", None
42
 
43
- answers = []
44
- log = []
45
 
46
- for q in questions:
47
- task_id = q.get("task_id")
48
- question = q.get("question")
49
- if not task_id or not question:
50
  continue
51
  try:
52
- answer = agent(question)
53
- answers.append({"task_id": task_id, "submitted_answer": answer})
54
- log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
55
  except Exception as e:
56
- log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"ERROR: {e}"})
57
 
58
- if not answers:
59
- return "⚠️ No answers were generated.", pd.DataFrame(log)
60
 
61
- submission = {
62
  "username": username.strip(),
63
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
64
- "answers": answers
65
  }
66
 
67
  try:
68
- r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=30)
69
- r.raise_for_status()
70
- res = r.json()
71
- return (
72
- f"Submission Successful!\n"
73
- f"User: {res.get('username')}\n"
74
- f"Score: {res.get('score', 'N/A')}% "
75
- f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})\n"
76
- f"Message: {res.get('message', '')}"
77
- ), pd.DataFrame(log)
 
78
  except Exception as e:
79
- return f"Submission failed: {e}", pd.DataFrame(log)
80
 
81
  # --- Gradio UI ---
82
  with gr.Blocks() as demo:
83
- gr.Markdown("# Basic Agent Evaluation")
84
- gr.Markdown("Login with Hugging Face and click the button to run evaluation and submit your answers.")
85
-
86
- profile = gr.LoginButton()
87
- run_button = gr.Button("Run and Submit")
88
- status_output = gr.Textbox(label="Submission Status", lines=4)
89
- results_table = gr.DataFrame(label="Answers Submitted")
 
 
 
 
 
 
 
 
90
 
91
- run_button.click(fn=run_and_submit_all, inputs=[profile], outputs=[status_output, results_table])
 
 
 
 
 
 
 
92
 
93
  if __name__ == "__main__":
94
- demo.launch()
 
 
4
  import pandas as pd
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
 
7
+ # ---------- Imports for Advanced Agent ----------
8
+ import re
9
+ from langgraph.graph import StateGraph, MessagesState
10
+ from langgraph.prebuilt import tools_condition, ToolNode
11
+ from langchain_core.messages import SystemMessage, HumanMessage
12
+ from langchain_core.tools import tool
13
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
14
+ from langchain_community.tools.tavily_search import TavilySearchResults
15
+ from groq import Groq
16
+
17
  # --- Constants ---
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
+ # ---------- Tools ----------
21
+ from langchain_core.tools import tool
22
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
23
+ from langchain_community.tools.tavily_search import TavilySearchResults
24
+
25
+ @tool
26
+ def wiki_search(query: str) -> str:
27
+ """Search Wikipedia for a given query and return content from up to 2 relevant pages."""
28
+ docs = WikipediaLoader(query=query, load_max_docs=2).load()
29
+ return "\n\n".join([doc.page_content for doc in docs])
30
+
31
+ @tool
32
+ def web_search(query: str) -> str:
33
+ """Search the web using the Tavily API and return content from up to 3 search results."""
34
+ docs = TavilySearchResults(max_results=3).invoke(query)
35
+ return "\n\n".join([doc.page_content for doc in docs])
36
+
37
+ @tool
38
+ def arvix_search(query: str) -> str:
39
+ """Search academic papers on Arxiv for a given query and return up to 3 result summaries."""
40
+ docs = ArxivLoader(query=query, load_max_docs=3).load()
41
+ return "\n\n".join([doc.page_content[:1000] for doc in docs])
42
+
43
+ # Tool-based LangGraph builder
44
+ def build_tool_graph(system_prompt):
45
+ llm = AutoModelForCausalLM.from_pretrained("gpt2") # Load Hugging Face GPT-2 model
46
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
47
+
48
+ def assistant(state: MessagesState):
49
+ input_text = state["messages"][-1]["content"]
50
+ inputs = tokenizer(input_text, return_tensors="pt")
51
+ outputs = llm.generate(**inputs)
52
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
+ return {"messages": [{"content": result}]}
54
+
55
+ builder = StateGraph(MessagesState)
56
+ builder.add_node("assistant", assistant)
57
+ builder.add_node("tools", ToolNode([wiki_search, web_search, arvix_search]))
58
+ builder.set_entry_point("assistant")
59
+ builder.set_finish_point("assistant")
60
+ builder.add_conditional_edges("assistant", tools_condition)
61
+ builder.add_edge("tools", "assistant")
62
+ return builder.compile()
63
+
64
+
65
+ # --- Advanced BasicAgent Class ---
66
  class BasicAgent:
67
  def __init__(self):
68
  print("BasicAgent initialized.")
69
+ self.client = Groq(api_key=os.environ.get("GROQ_API_KEY", ""))
 
70
  self.agent_prompt = (
71
+ """You are a general AI assistant. I will ask you a question. Report your thoughts, and
72
+ finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
73
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
74
+ list of numbers and/or strings.
75
+ If you are asked for a number, don't use comma to write your number neither use units such as $
76
+ or percent sign unless specified otherwise.
77
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the
78
+ digits in plain text unless specified otherwise.
79
+ If you are asked for a comma separated list, apply the above rules depending of whether the element
80
+ to be put in the list is a number or a string."""
81
  )
82
+ self.tool_chain = build_tool_graph(self.agent_prompt)
83
+
84
+ def format_final_answer(self, answer: str) -> str:
85
+ cleaned = " ".join(answer.split())
86
+ match = re.search(r"FINAL ANSWER:\s*(.*)", cleaned, re.IGNORECASE)
87
+ return f"FINAL ANSWER: {match.group(1).strip()}" if match else f"FINAL ANSWER: {cleaned}"
88
+
89
+ def query_groq(self, question: str) -> str:
90
+ full_prompt = f"{self.agent_prompt}\n\nQuestion: {question}"
91
+ try:
92
+ response = self.client.chat.completions.create(
93
+ model="llama3-8b-8192",
94
+ messages=[{"role": "user", "content": full_prompt}]
95
+ )
96
+ answer = response.choices[0].message.content
97
+ print(f"[Groq Raw Response]: {answer}")
98
+ return self.format_final_answer(answer).upper()
99
+ except Exception as e:
100
+ print(f"[Groq ERROR]: {e}")
101
+ return self.format_final_answer("GROQ_ERROR")
102
+
103
+ def query_tools(self, question: str) -> str:
104
+ try:
105
+ input_state = {
106
+ "messages": [
107
+ SystemMessage(content=self.agent_prompt),
108
+ HumanMessage(content=question)
109
+ ]
110
+ }
111
+ result = self.tool_chain.invoke(input_state)
112
+ final_msg = result["messages"][-1].content
113
+ print(f"[LangGraph Final Response]: {final_msg}")
114
+ return self.format_final_answer(final_msg)
115
+ except Exception as e:
116
+ print(f"[LangGraph ERROR]: {e}")
117
+ return self.format_final_answer("TOOL_ERROR")
118
 
119
  def __call__(self, question: str) -> str:
120
+ print(f"Received question: {question[:50]}...")
121
+ if "commutative" in question.lower():
122
+ return self.check_commutativity()
123
+ if self.maybe_reversed(question):
124
+ print("Detected likely reversed riddle.")
125
+ return self.solve_riddle(question)
126
+ if "use tools" in question.lower():
127
+ return self.query_tools(question)
128
+ return self.query_groq(question)
129
+
130
+ def check_commutativity(self):
131
+ S = ['a', 'b', 'c', 'd', 'e']
132
+ counter_example_elements = set()
133
+ index = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}
134
+ self.operation_table = [
135
+ ['a', 'b', 'c', 'b', 'd'],
136
+ ['b', 'c', 'a', 'e', 'c'],
137
+ ['c', 'a', 'b', 'b', 'a'],
138
+ ['b', 'e', 'b', 'e', 'd'],
139
+ ['d', 'b', 'a', 'd', 'c']
140
+ ]
141
+ for x in S:
142
+ for y in S:
143
+ x_idx = index[x]
144
+ y_idx = index[y]
145
+ if self.operation_table[x_idx][y_idx] != self.operation_table[y_idx][x_idx]:
146
+ counter_example_elements.add(x)
147
+ counter_example_elements.add(y)
148
+ return self.format_final_answer(", ".join(sorted(counter_example_elements)))
149
+
150
+ def maybe_reversed(self, text: str) -> bool:
151
+ words = text.split()
152
+ reversed_ratio = sum(
153
+ 1 for word in words if word[::-1].lower() in {
154
+ "if", "you", "understand", "this", "sentence", "write",
155
+ "opposite", "of", "the", "word", "left", "answer"
156
+ }
157
+ ) / len(words)
158
+ return reversed_ratio > 0.3
159
+
160
+ def solve_riddle(self, question: str) -> str:
161
+ question = question[::-1]
162
+ if "opposite of the word" in question:
163
+ match = re.search(r"opposite of the word ['\"](\w+)['\"]", question)
164
+ if match:
165
+ word = match.group(1).lower()
166
+ opposites = {
167
+ "left": "right", "up": "down", "hot": "cold",
168
+ "true": "false", "yes": "no", "black": "white"
169
+ }
170
+ opposite = opposites.get(word, f"UNKNOWN_OPPOSITE_OF_{word}")
171
+ return f"FINAL ANSWER: {opposite.upper()}"
172
+ return self.format_final_answer("COULD_NOT_SOLVE")
173
+
174
+ # --- Evaluation Logic ---
175
+ def run_and_submit_all(profile, test_mode):
176
+ space_id = os.getenv("SPACE_ID")
177
+ if profile:
178
+ username = profile
179
+ print(f"User logged in: {username}")
180
+ else:
181
+ return "Please Login to Hugging Face with the button.", None
182
+
183
+ api_url = DEFAULT_API_URL
184
+ questions_url = f"{api_url}/questions"
185
+ submit_url = f"{api_url}/submit"
186
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
187
+
188
+ try:
189
+ agent = BasicAgent()
190
+ except Exception as e:
191
+ return f"Error initializing agent: {e}", None
192
 
193
  try:
194
+ response = requests.get(questions_url, timeout=15)
195
+ response.raise_for_status()
196
+ questions_data = response.json()
197
+ if not questions_data:
198
+ return "Fetched questions list is empty or invalid format.", None
199
  except Exception as e:
200
+ return f"Error fetching questions: {e}", None
201
 
202
+ results_log = []
203
+ answers_payload = []
204
 
205
+ for item in questions_data:
206
+ task_id = item.get("task_id")
207
+ question_text = item.get("question")
208
+ if not task_id or question_text is None:
209
  continue
210
  try:
211
+ submitted_answer = agent(question_text)
212
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
213
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
214
  except Exception as e:
215
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
216
 
217
+ if not answers_payload:
218
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
219
 
220
+ submission_data = {
221
  "username": username.strip(),
222
+ "agent_code": agent_code,
223
+ "answers": answers_payload
224
  }
225
 
226
  try:
227
+ response = requests.post(submit_url, json=submission_data, timeout=60)
228
+ response.raise_for_status()
229
+ result_data = response.json()
230
+ final_status = (
231
+ f"Submission Successful!\n"
232
+ f"User: {result_data.get('username')}\n"
233
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
234
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
235
+ f"Message: {result_data.get('message', 'No message received.')}"
236
+ )
237
+ return final_status, pd.DataFrame(results_log)
238
  except Exception as e:
239
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
240
 
241
  # --- Gradio UI ---
242
  with gr.Blocks() as demo:
243
+ gr.Markdown("# Basic Agent Evaluation Runner")
244
+ gr.Markdown(
245
+ """
246
+ **Instructions:**
247
+ 1. Clone and customize your agent logic.
248
+ 2. Log in with Hugging Face.
249
+ 3. Click the button to run evaluation and submit your answers.
250
+ """
251
+ )
252
+
253
+ # Simulate OAuth profile with a textbox for user
254
+ test_checkbox = gr.Checkbox(label="Enable Test Mode (Skip Submission)", value=False)
255
+ run_button = gr.Button("Run Evaluation")
256
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
257
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
258
 
259
+ # Simulate OAuth Profile with a mock profile for now
260
+ mock_oauth_profile = gr.Textbox(label="Simulated OAuth Profile", value="mock_user", interactive=False)
261
+
262
+ run_button.click(
263
+ fn=run_and_submit_all,
264
+ inputs=[mock_oauth_profile, test_checkbox],
265
+ outputs=[status_output, results_table]
266
+ )
267
 
268
  if __name__ == "__main__":
269
+ print("Launching Gradio Interface...")
270
+ demo.launch(debug=True, share=False)