emre commited on
Commit
7ce3c76
·
1 Parent(s): 46ada82

Add application file

Browse files
Files changed (1) hide show
  1. app.py +193 -7
app.py CHANGED
@@ -1,11 +1,197 @@
 
1
  import gradio as gr
2
- from smolagents import CodeAgent, LiteLLMModel
 
 
 
3
 
4
- model = LiteLLMModel(model_id="ollama/gemma3:12b", api_base="http://localhost:11434")
5
- agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 
6
 
7
- def run_agent(prompt):
8
- return agent.run(prompt)
 
9
 
10
- iface = gr.Interface(fn=run_agent, inputs="text", outputs="text", title="Code Agent")
11
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
6
+ from huggingface_hub import login
7
 
8
+ # Constants
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
+ HF_TOKEN = os.environ.get("HF_TOKEN", "") # Hugging Face token'ı buraya veya environment variable olarak ekle") # Hugging Face token
11
 
12
+ # Login if token is provided
13
+ if HF_TOKEN:
14
+ login(token=HF_TOKEN)
15
 
16
+ class GAIACodeAgent:
17
+ def __init__(self):
18
+ """Initialize the advanced agent with tools and capabilities"""
19
+ model = InferenceClientModel()
20
+ self.agent = CodeAgent(
21
+ tools=[DuckDuckGoSearchTool()],
22
+ model=model
23
+ )
24
+
25
+ def __call__(self, question: str) -> str:
26
+ """Process a question and return an answer"""
27
+ try:
28
+ print(f"Agent received question: {question[:50]}...")
29
+ # Improve the prompt to get better accuracy on exact match questions
30
+ enriched_prompt = (
31
+ f"Answer the following question accurately and concisely. "
32
+ f"Provide a straightforward answer without unnecessary elaboration. "
33
+ f"The answer will be evaluated for exact match accuracy.\n\n"
34
+ f"Question: {question}\n\n"
35
+ f"Answer: "
36
+ )
37
+
38
+ # Run the agent with the enriched prompt
39
+ response = self.agent.run(enriched_prompt)
40
+
41
+ # Clean up response to improve exact match chances
42
+ cleaned_response = response.strip()
43
+ print(f"Agent returning answer: {cleaned_response[:50]}...")
44
+ return cleaned_response
45
+ except Exception as e:
46
+ error_msg = f"Error: {str(e)}"
47
+ print(error_msg)
48
+ return error_msg
49
+
50
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
51
+ """
52
+ Fetches all questions, runs the GAIACodeAgent on them, submits all answers,
53
+ and displays the results.
54
+ """
55
+ # Determine HF Space Runtime URL and Repo URL
56
+ space_id = os.getenv("SPACE_ID")
57
+
58
+ if profile:
59
+ username = profile.username
60
+ print(f"User logged in: {username}")
61
+ else:
62
+ print("User not logged in.")
63
+ return "Please Login to Hugging Face with the button.", None
64
+
65
+ api_url = DEFAULT_API_URL
66
+ questions_url = f"{api_url}/questions"
67
+ submit_url = f"{api_url}/submit"
68
+
69
+ # 1. Instantiate Agent
70
+ try:
71
+ agent = GAIACodeAgent()
72
+ except Exception as e:
73
+ print(f"Error instantiating agent: {e}")
74
+ return f"Error initializing agent: {e}", None
75
+
76
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
77
+ print(f"Agent code URL: {agent_code}")
78
+
79
+ # 2. Fetch Questions
80
+ print(f"Fetching questions from: {questions_url}")
81
+ try:
82
+ response = requests.get(questions_url, timeout=15)
83
+ response.raise_for_status()
84
+ questions_data = response.json()
85
+ if not questions_data:
86
+ print("Fetched questions list is empty.")
87
+ return "Fetched questions list is empty or invalid format.", None
88
+ print(f"Fetched {len(questions_data)} questions.")
89
+ except requests.exceptions.RequestException as e:
90
+ print(f"Error fetching questions: {e}")
91
+ return f"Error fetching questions: {e}", None
92
+ except Exception as e:
93
+ print(f"An unexpected error occurred fetching questions: {e}")
94
+ return f"An unexpected error occurred fetching questions: {e}", None
95
+
96
+ # 3. Run Agent
97
+ results_log = []
98
+ answers_payload = []
99
+ print(f"Running agent on {len(questions_data)} questions...")
100
+ for item in questions_data:
101
+ task_id = item.get("task_id")
102
+ question_text = item.get("question")
103
+ if not task_id or question_text is None:
104
+ print(f"Skipping item with missing task_id or question: {item}")
105
+ continue
106
+ try:
107
+ submitted_answer = agent(question_text)
108
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
109
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
110
+ except Exception as e:
111
+ print(f"Error running agent on task {task_id}: {e}")
112
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
113
+
114
+ if not answers_payload:
115
+ print("Agent did not produce any answers to submit.")
116
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
117
+
118
+ # 4. Prepare Submission
119
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
120
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
121
+ print(status_update)
122
+
123
+ # 5. Submit
124
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
125
+ try:
126
+ response = requests.post(submit_url, json=submission_data, timeout=60)
127
+ response.raise_for_status()
128
+ result_data = response.json()
129
+ final_status = (
130
+ f"Submission Successful!\n"
131
+ f"User: {result_data.get('username')}\n"
132
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
133
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
134
+ f"Message: {result_data.get('message', 'No message received.')}"
135
+ )
136
+ print("Submission successful.")
137
+ results_df = pd.DataFrame(results_log)
138
+ return final_status, results_df
139
+ except requests.exceptions.HTTPError as e:
140
+ error_detail = f"Server responded with status {e.response.status_code}."
141
+ try:
142
+ error_json = e.response.json()
143
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
144
+ except requests.exceptions.JSONDecodeError:
145
+ error_detail += f" Response: {e.response.text[:500]}"
146
+ status_message = f"Submission Failed: {error_detail}"
147
+ print(status_message)
148
+ results_df = pd.DataFrame(results_log)
149
+ return status_message, results_df
150
+ except Exception as e:
151
+ status_message = f"An unexpected error occurred during submission: {e}"
152
+ print(status_message)
153
+ results_df = pd.DataFrame(results_log)
154
+ return status_message, results_df
155
+
156
+ def query_single_agent(question):
157
+ """Run agent on a single question for testing"""
158
+ try:
159
+ agent = GAIACodeAgent()
160
+ response = agent(question)
161
+ return response
162
+ except Exception as e:
163
+ return f"Error: {str(e)}"
164
+
165
+ # Build Gradio Interface
166
+ with gr.Blocks(title="GAIA Code Agent Evaluation") as demo:
167
+ gr.Markdown("# GAIA Code Agent Evaluation")
168
+ gr.Markdown(
169
+ """
170
+ This application helps you evaluate a code agent on the GAIA benchmark.
171
+
172
+ ## Instructions:
173
+ 1. Log in to your Hugging Face account using the button below
174
+ 2. You can test the agent with a single question in the "Test Agent" tab
175
+ 3. Use the "Run Evaluation" tab to run the agent on all GAIA questions and submit answers
176
+ """
177
+ )
178
+
179
+ with gr.Tab("Test Agent"):
180
+ question_input = gr.Textbox(
181
+ label="Enter a question",
182
+ placeholder="How many seconds would it take for a leopard at full speed to run through Pont des Arts?"
183
+ )
184
+ query_button = gr.Button("Get Answer")
185
+ response_output = gr.Textbox(label="Agent Response", lines=10)
186
+ query_button.click(query_single_agent, inputs=question_input, outputs=response_output)
187
+
188
+ with gr.Tab("Run Evaluation"):
189
+ gr.LoginButton()
190
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
191
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
192
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
193
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
194
+
195
+ # Start the app
196
+ if __name__ == "__main__":
197
+ demo.launch()