Jean-Baptiste Pin commited on
Commit
f0f01a3
·
1 Parent(s): 7ce8f44
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app_agent.py +268 -0
  3. app_lg.py +299 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ tmp
2
+ .ropeproject
3
+ tool
app_agent.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ from pathlib import Path
6
+ import yaml
7
+ import pandas as pd
8
+ from textwrap import dedent
9
+ from agno.agent import Agent, RunResponse # noqa
10
+ from agno.models.lmstudio import LMStudio
11
+ from agno.media import Audio
12
+ from agno.tools.duckduckgo import DuckDuckGoTools
13
+ from agno.tools.serpapi import SerpApiTools
14
+ from agno.memory.v2.db.sqlite import SqliteMemoryDb
15
+ from agno.memory.v2.memory import Memory
16
+ from agno.storage.sqlite import SqliteStorage
17
+ from agno.tools.wikipedia import WikipediaTools
18
+ from agno.tools.website import WebsiteTools
19
+ from agno.tools.calculator import CalculatorTools
20
+ from agno.tools.pandas import PandasTools
21
+ from agno.tools.python import PythonTools
22
+ from agno.media import Image
23
+ from agno.tools.reasoning import ReasoningTools
24
+ from agno.tools.file import FileTools
25
+ from agno.tools.csv_toolkit import CsvTools
26
+ from pydantic import BaseModel, Field
27
+ from agno.models.google import Gemini
28
+ # (Keep Constants as is)
29
+ # --- Constants ---
30
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
+ os.environ["SERPAPI_API_KEY"] = "..."
32
+ # --- Basic Agent Definition ---
33
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
34
+ #
35
+
36
+ # AIzaSyA_XF3iRkCr1BMaVKXkUEsz4elBfQoWfHA
37
+
38
+ class BasicAgent:
39
+ def __init__(self):
40
+
41
+ memory_db = SqliteMemoryDb(table_name="memory", db_file="tmp/memory.db")
42
+ self.memory = Memory(db=memory_db)
43
+ self.memory.clear()
44
+ storage = SqliteStorage(
45
+ # store sessions in the ai.sessions table
46
+ table_name="agent_sessions",
47
+ # db_file: Sqlite database file
48
+ db_file="tmp/data.db",
49
+ )
50
+ # ReasoningTools(
51
+ # think=True,
52
+ # analyze=True,
53
+ # add_instructions=True,
54
+ # add_few_shot=False,
55
+ # instructions="Reply with only the final_answer nothing else."
56
+ # )
57
+ self.agent = Agent(
58
+ #model=LMStudio(id="deepcogito-cogito-v1-preview-qwen-32b@8bit"),
59
+ model=LMStudio(id="meta-llama-3.1-8b-instruct"),
60
+ # model=Gemini(api_key="...", temperature=0.1, id="gemini-2.0-flash", grounding=True, search=True),
61
+ tools=[PandasTools(), SerpApiTools(), WebsiteTools(), CalculatorTools(
62
+ add=True,
63
+ subtract=True,
64
+ multiply=True,
65
+ divide=True,
66
+ ), PythonTools(base_dir=Path("tmp/python")), FileTools(Path("tmp/file")), WikipediaTools()],
67
+ show_tool_calls=True,
68
+ tool_call_limit=8,
69
+ search_knowledge=True,
70
+ update_knowledge=True,
71
+ read_tool_call_history=True,
72
+ add_history_to_messages=True,
73
+ num_history_responses=3,
74
+ memory=self.memory,
75
+ storage=storage,
76
+ debug_mode=True,
77
+ instructions=["Reply with only the final_answer without '.' and nothing else."],
78
+ expected_output="Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
79
+ )
80
+ print("BasicAgent initialized.")
81
+ def __call__(self, question: str, file: str, taskId: str):
82
+ print(f"Agent received question (first 100 chars): {question[:100]}...")
83
+ if file :
84
+ question = question + f" You can donwload the file associated at {DEFAULT_API_URL}/files/{taskId}"
85
+ answer = self.agent.run(question)
86
+ print(answer)
87
+ return answer.content
88
+ # print(f"Agent returning fixed answer: {fixed_answer}")
89
+
90
+
91
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
92
+ """
93
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
94
+ and displays the results.
95
+ """
96
+ # --- Determine HF Space Runtime URL and Repo URL ---
97
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
98
+
99
+ if profile:
100
+ username= f"{profile.username}"
101
+ print(f"User logged in: {username}")
102
+ else:
103
+ print("User not logged in.")
104
+ return "Please Login to Hugging Face with the button.", None
105
+
106
+ api_url = DEFAULT_API_URL
107
+ questions_url = f"{api_url}/questions"
108
+ # questions_url = f"{api_url}/random-question"
109
+ submit_url = f"{api_url}/submit"
110
+
111
+ # 1. Instantiate Agent ( modify this part to create your agent)
112
+ try:
113
+ agent = BasicAgent()
114
+ except Exception as e:
115
+ print(f"Error instantiating agent: {e}")
116
+ return f"Error initializing agent: {e}", None
117
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
118
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
119
+ print(agent_code)
120
+
121
+ # 2. Fetch Questions
122
+ print(f"Fetching questions from: {questions_url}")
123
+ try:
124
+ response = requests.get(questions_url, timeout=15)
125
+ response.raise_for_status()
126
+ questions_data = response.json()
127
+ if not questions_data:
128
+ print("Fetched questions list is empty.")
129
+ return "Fetched questions list is empty or invalid format.", None
130
+ print(f"Fetched {len(questions_data)} questions.")
131
+ except requests.exceptions.RequestException as e:
132
+ print(f"Error fetching questions: {e}")
133
+ return f"Error fetching questions: {e}", None
134
+ except requests.exceptions.JSONDecodeError as e:
135
+ print(f"Error decoding JSON response from questions endpoint: {e}")
136
+ print(f"Response text: {response.text[:500]}")
137
+ return f"Error decoding server response for questions: {e}", None
138
+ except Exception as e:
139
+ print(f"An unexpected error occurred fetching questions: {e}")
140
+ return f"An unexpected error occurred fetching questions: {e}", None
141
+
142
+ # 3. Run your Agent
143
+ results_log = []
144
+ answers_payload = []
145
+ print(f"Running agent on {len(questions_data)} questions...")
146
+ for item in questions_data:
147
+ task_id = item.get("task_id")
148
+ question_text = item.get("question")
149
+ question_file = item.get("file_name")
150
+ if not task_id or question_text is None:
151
+ print(f"Skipping item with missing task_id or question: {item}")
152
+ continue
153
+ try:
154
+ submitted_answer = agent(question_text, question_file, task_id)
155
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
156
+ print(f"Question: {item}, Task ID: {task_id}, Submitted Answer: {submitted_answer}")
157
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
158
+ except Exception as e:
159
+ print(f"Error running agent on task {task_id}: {e}")
160
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
161
+
162
+ if not answers_payload:
163
+ print("Agent did not produce any answers to submit.")
164
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
165
+
166
+ # 4. Prepare Submission
167
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
168
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
169
+ print(status_update)
170
+
171
+ # 5. Submit
172
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
173
+ try:
174
+ response = requests.post(submit_url, json=submission_data, timeout=60)
175
+ response.raise_for_status()
176
+ result_data = response.json()
177
+ final_status = (
178
+ f"Submission Successful!\n"
179
+ f"User: {result_data.get('username')}\n"
180
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
181
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
182
+ f"Message: {result_data.get('message', 'No message received.')}"
183
+ )
184
+ print("Submission successful.")
185
+ results_df = pd.DataFrame(results_log)
186
+ return final_status, results_df
187
+ except requests.exceptions.HTTPError as e:
188
+ error_detail = f"Server responded with status {e.response.status_code}."
189
+ try:
190
+ error_json = e.response.json()
191
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
192
+ except requests.exceptions.JSONDecodeError:
193
+ error_detail += f" Response: {e.response.text[:500]}"
194
+ status_message = f"Submission Failed: {error_detail}"
195
+ print(status_message)
196
+ results_df = pd.DataFrame(results_log)
197
+ return status_message, results_df
198
+ except requests.exceptions.Timeout:
199
+ status_message = "Submission Failed: The request timed out."
200
+ print(status_message)
201
+ results_df = pd.DataFrame(results_log)
202
+ return status_message, results_df
203
+ except requests.exceptions.RequestException as e:
204
+ status_message = f"Submission Failed: Network error - {e}"
205
+ print(status_message)
206
+ results_df = pd.DataFrame(results_log)
207
+ return status_message, results_df
208
+ except Exception as e:
209
+ status_message = f"An unexpected error occurred during submission: {e}"
210
+ print(status_message)
211
+ results_df = pd.DataFrame(results_log)
212
+ return status_message, results_df
213
+
214
+
215
+ # --- Build Gradio Interface using Blocks ---
216
+ with gr.Blocks() as demo:
217
+ gr.Markdown("# Basic Agent Evaluation Runner")
218
+ gr.Markdown(
219
+ """
220
+ **Instructions:**
221
+
222
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
223
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
224
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
225
+
226
+ ---
227
+ **Disclaimers:**
228
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
229
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
230
+ """
231
+ )
232
+
233
+ gr.LoginButton()
234
+
235
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
236
+
237
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
238
+ # Removed max_rows=10 from DataFrame constructor
239
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
240
+
241
+ run_button.click(
242
+ fn=run_and_submit_all,
243
+ outputs=[status_output, results_table]
244
+ )
245
+
246
+ if __name__ == "__main__":
247
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
248
+ # Check for SPACE_HOST and SPACE_ID at startup for information
249
+ space_host_startup = os.getenv("SPACE_HOST")
250
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
251
+
252
+ if space_host_startup:
253
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
254
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
255
+ else:
256
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
257
+
258
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
259
+ print(f"✅ SPACE_ID found: {space_id_startup}")
260
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
261
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
262
+ else:
263
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
264
+
265
+ print("-"*(60 + len(" App Starting ")) + "\n")
266
+
267
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
268
+ demo.launch(debug=True, share=False)
app_lg.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import yaml
6
+ import pandas as pd
7
+ from typing import Annotated, Optional
8
+ from typing_extensions import TypedDict
9
+ from langgraph.graph import StateGraph, START, END
10
+ from langgraph.graph.message import add_messages
11
+ from langchain_openai import ChatOpenAI
12
+ from langgraph.prebuilt import create_react_agent
13
+ from langchain_community.tools import DuckDuckGoSearchRun,DuckDuckGoSearchResults
14
+ from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
15
+ from langchain_community.agent_toolkits.openapi.toolkit import RequestsToolkit
16
+ from langchain_community.utilities.requests import TextRequestsWrapper
17
+ from langchain.agents import AgentExecutor, load_tools
18
+ from langchain_community.utilities import GoogleSerperAPIWrapper
19
+ from langchain_community.tools.riza.command import ExecPython
20
+
21
+ os.environ["SERPER_API_KEY"] = "..."
22
+
23
+ os.environ["RIZA_API_KEY"] = "..."
24
+
25
+ # (Keep Constants as is)
26
+ # --- Constants ---
27
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
+
29
+ vision_llm = ChatOpenAI(model="qwen2.5-vl-7b-instruct", base_url="http://172.16.216.190:1234/v1")
30
+
31
+ def extract_text(img_path: str) -> str:
32
+ """
33
+ Extract text from an image file using a multimodal model.
34
+
35
+ Master Wayne often leaves notes with his training regimen or meal plans.
36
+ This allows me to properly analyze the contents.
37
+ """
38
+ all_text = ""
39
+ try:
40
+ # Read image and encode as base64
41
+ with open(img_path, "rb") as image_file:
42
+ image_bytes = image_file.read()
43
+
44
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
45
+
46
+ # Prepare the prompt including the base64 image data
47
+ message = [
48
+ HumanMessage(
49
+ content=[
50
+ {
51
+ "type": "text",
52
+ "text": (
53
+ "Extract all the text from this image. "
54
+ "Return only the extracted text, no explanations."
55
+ ),
56
+ },
57
+ {
58
+ "type": "image_url",
59
+ "image_url": {
60
+ "url": f"data:image/png;base64,{image_base64}"
61
+ },
62
+ },
63
+ ]
64
+ )
65
+ ]
66
+
67
+ # Call the vision-capable model
68
+ response = vision_llm.invoke(message)
69
+
70
+ # Append extracted text
71
+ all_text += response.content + "\n\n"
72
+
73
+ return all_text.strip()
74
+ except Exception as e:
75
+ # A butler should handle errors gracefully
76
+ error_msg = f"Error extracting text: {str(e)}"
77
+ print(error_msg)
78
+ return ""
79
+
80
+ # --- Basic Agent Definition ---
81
+ class State(TypedDict):
82
+ # Messages have the type "list". The `add_messages` function
83
+ # in the annotation defines how this state key should be updated
84
+ # (in this case, it appends messages to the list, rather than overwriting them)
85
+ messages: Annotated[list, add_messages]
86
+ input_file: Optional[str]
87
+
88
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
89
+ class BasicAgent:
90
+ def __init__(self):
91
+ # model = ChatOpenAI(
92
+ # # model="qwen3-30b-a3b-mlx",
93
+ # model="meta-llama-3.1-8b-instruct",
94
+ # base_url="http://192.168.1.82:1234/v1",
95
+ # temperature=0,
96
+ # api_key="not-needed"
97
+ # )
98
+ toolkit = RequestsToolkit(
99
+ requests_wrapper=TextRequestsWrapper(headers={}),
100
+ allow_dangerous_requests=True,
101
+ )
102
+ tools = [extract_text, ExecPython()] + toolkit.get_tools() + load_tools(["google-serper"])
103
+ self.agent = create_react_agent(
104
+ model="gemini-2.0-flash",
105
+ tools=tools )
106
+
107
+
108
+ print("BasicAgent initialized.")
109
+ def __call__(self, question: str, file: str, taskId: str):
110
+ print(f"Agent received question (first 100 chars): {question[:100]}...")
111
+
112
+ if file :
113
+ question = question + f" You can donwload the file associated at {DEFAULT_API_URL}/files/{taskId}"
114
+
115
+ result = self.agent.invoke({"messages": [HumanMessage(content=question)]})
116
+ answer = result['messages'][-1].content
117
+ return answer
118
+
119
+
120
+
121
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
122
+ """
123
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
124
+ and displays the results.
125
+ """
126
+ os.environ["HF_TOKEN"] = "..."
127
+ # --- Determine HF Space Runtime URL and Repo URL ---
128
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
129
+
130
+ if profile:
131
+ username= f"{profile.username}"
132
+ print(f"User logged in: {username}")
133
+ else:
134
+ print("User not logged in.")
135
+ return "Please Login to Hugging Face with the button.", None
136
+
137
+ api_url = DEFAULT_API_URL
138
+ questions_url = f"{api_url}/questions"
139
+ # questions_url = f"{api_url}/random-question"
140
+ submit_url = f"{api_url}/submit"
141
+
142
+ # 1. Instantiate Agent ( modify this part to create your agent)
143
+ try:
144
+ agent = BasicAgent()
145
+ except Exception as e:
146
+ print(f"Error instantiating agent: {e}")
147
+ return f"Error initializing agent: {e}", None
148
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
149
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
150
+ print(agent_code)
151
+
152
+ # 2. Fetch Questions
153
+ print(f"Fetching questions from: {questions_url}")
154
+ try:
155
+ response = requests.get(questions_url, timeout=15)
156
+ response.raise_for_status()
157
+ questions_data = response.json()
158
+ if not questions_data:
159
+ print("Fetched questions list is empty.")
160
+ return "Fetched questions list is empty or invalid format.", None
161
+ print(f"Fetched {len(questions_data)} questions.")
162
+ except requests.exceptions.RequestException as e:
163
+ print(f"Error fetching questions: {e}")
164
+ return f"Error fetching questions: {e}", None
165
+ except requests.exceptions.JSONDecodeError as e:
166
+ print(f"Error decoding JSON response from questions endpoint: {e}")
167
+ print(f"Response text: {response.text[:500]}")
168
+ return f"Error decoding server response for questions: {e}", None
169
+ except Exception as e:
170
+ print(f"An unexpected error occurred fetching questions: {e}")
171
+ return f"An unexpected error occurred fetching questions: {e}", None
172
+
173
+ # 3. Run your Agent
174
+ results_log = []
175
+ answers_payload = []
176
+ print(f"Running agent on {len(questions_data)} questions...")
177
+ for item in questions_data:
178
+ task_id = item.get("task_id")
179
+ question_text = item.get("question")
180
+ question_file = item.get("file_name")
181
+ if not task_id or question_text is None:
182
+ print(f"Skipping item with missing task_id or question: {item}")
183
+ continue
184
+ try:
185
+ submitted_answer = agent(question_text, question_file, task_id)
186
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
187
+ print(f"Question: {item}, Task ID: {task_id}, Submitted Answer: {submitted_answer}")
188
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
189
+ except Exception as e:
190
+ print(f"Error running agent on task {task_id}: {e}")
191
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
192
+
193
+ if not answers_payload:
194
+ print("Agent did not produce any answers to submit.")
195
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
196
+
197
+ # 4. Prepare Submission
198
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
199
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
200
+ print(status_update)
201
+
202
+ # 5. Submit
203
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
204
+ try:
205
+ response = requests.post(submit_url, json=submission_data, timeout=60)
206
+ response.raise_for_status()
207
+ result_data = response.json()
208
+ final_status = (
209
+ f"Submission Successful!\n"
210
+ f"User: {result_data.get('username')}\n"
211
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
212
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
213
+ f"Message: {result_data.get('message', 'No message received.')}"
214
+ )
215
+ print("Submission successful.")
216
+ results_df = pd.DataFrame(results_log)
217
+ return final_status, results_df
218
+ except requests.exceptions.HTTPError as e:
219
+ error_detail = f"Server responded with status {e.response.status_code}."
220
+ try:
221
+ error_json = e.response.json()
222
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
223
+ except requests.exceptions.JSONDecodeError:
224
+ error_detail += f" Response: {e.response.text[:500]}"
225
+ status_message = f"Submission Failed: {error_detail}"
226
+ print(status_message)
227
+ results_df = pd.DataFrame(results_log)
228
+ return status_message, results_df
229
+ except requests.exceptions.Timeout:
230
+ status_message = "Submission Failed: The request timed out."
231
+ print(status_message)
232
+ results_df = pd.DataFrame(results_log)
233
+ return status_message, results_df
234
+ except requests.exceptions.RequestException as e:
235
+ status_message = f"Submission Failed: Network error - {e}"
236
+ print(status_message)
237
+ results_df = pd.DataFrame(results_log)
238
+ return status_message, results_df
239
+ except Exception as e:
240
+ status_message = f"An unexpected error occurred during submission: {e}"
241
+ print(status_message)
242
+ results_df = pd.DataFrame(results_log)
243
+ return status_message, results_df
244
+
245
+
246
+ # --- Build Gradio Interface using Blocks ---
247
+ with gr.Blocks() as demo:
248
+ gr.Markdown("# Basic Agent Evaluation Runner")
249
+ gr.Markdown(
250
+ """
251
+ **Instructions:**
252
+
253
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
254
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
255
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
256
+
257
+ ---
258
+ **Disclaimers:**
259
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
260
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
261
+ """
262
+ )
263
+
264
+ gr.LoginButton()
265
+
266
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
267
+
268
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
269
+ # Removed max_rows=10 from DataFrame constructor
270
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
271
+
272
+ run_button.click(
273
+ fn=run_and_submit_all,
274
+ outputs=[status_output, results_table]
275
+ )
276
+
277
+ if __name__ == "__main__":
278
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
279
+ # Check for SPACE_HOST and SPACE_ID at startup for information
280
+ space_host_startup = os.getenv("SPACE_HOST")
281
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
282
+
283
+ if space_host_startup:
284
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
285
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
286
+ else:
287
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
288
+
289
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
290
+ print(f"✅ SPACE_ID found: {space_id_startup}")
291
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
292
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
293
+ else:
294
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
295
+
296
+ print("-"*(60 + len(" App Starting ")) + "\n")
297
+
298
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
299
+ demo.launch(debug=True, share=False)