Datawithsarah's picture
remove final answer
586cfcf
raw
history blame
4.38 kB
""" Basic Agent Evaluation Runner"""
import os
import inspect
import gradio as gr
import requests
import pandas as pd
import time
from langchain_core.messages import HumanMessage
from agent import build_graph
import re
# (Keep Constants as is)
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Basic Agent Definition ---
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
cached_answers = []
def parse_final_answer(raw: str) -> str:
raw = raw.strip()
if "FINAL ANSWER:" in raw:
return raw.split("FINAL ANSWER:")[-1].strip()
return raw.split("Final Answer:")[-1].strip() if "Final Answer:" in raw else raw
class BasicAgent:
def __init__(self):
self.graph = build_graph()
def __call__(self, question: str) -> str:
messages = [HumanMessage(content=question)]
output = self.graph.invoke({"messages": messages})
return parse_final_answer(output['messages'][-1].content)
def run_agent_only(profile: gr.OAuthProfile | None):
global cached_answers
cached_answers = []
results_log = []
if not profile:
return "Please login first.", None
try:
agent = BasicAgent()
except Exception as e:
return f"Agent Init Error: {e}", None
try:
questions = requests.get("https://agents-course-unit4-scoring.hf.space/questions", timeout=15).json()
except Exception as e:
return f"Error fetching questions: {e}", None
with open("system_prompt.txt", "r") as f:
system_prompt = f.read().strip()
for item in questions:
task_id = item.get("task_id")
question = item.get("question")
file_name = item.get("file_name")
if not task_id or not question:
continue
try:
user_message = question + (f"\n\nFile to use: {file_name}" if file_name else "")
answer = agent(system_prompt + "\n\n" + user_message)
cached_answers.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"AGENT ERROR: {e}"})
return "Agent finished. Click 'Submit Cached Answers' next.", pd.DataFrame(results_log)
def submit_cached_answers(profile: gr.OAuthProfile | None):
if not profile or not cached_answers:
return "Nothing to submit. Run the agent first.", None
payload = {
"username": profile.username,
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
"answers": cached_answers
}
try:
response = requests.post("https://agents-course-unit4-scoring.hf.space/submit", json=payload, timeout=60)
result = response.json()
score = result.get("score", "?")
correct = result.get("correct_count", "?")
total = result.get("total_attempted", "?")
return f"Submission complete. Score: {score}% ({correct}/{total})", None
except Exception as e:
return f"Submission failed: {e}", None
with gr.Blocks() as demo:
gr.Markdown("""# Agent Evaluator
1. Login with Hugging Face
2. Run agent only
3. Submit answers""")
gr.LoginButton()
run_button = gr.Button("Run Agent")
submit_button = gr.Button("Submit Cached Answers")
status_box = gr.Textbox(label="Status", lines=4)
table = gr.DataFrame(label="Answers Log")
run_button.click(fn=run_agent_only, outputs=[status_box, table])
submit_button.click(fn=submit_cached_answers, outputs=[status_box, table]))
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"✅ SPACE_HOST found: {space_host_startup}")
print(f" Runtime URL: https://{space_host_startup}.hf.space")
else:
print("ℹ️ No SPACE_HOST found.")
if space_id_startup:
print(f"✅ SPACE_ID found: {space_id_startup}")
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
else:
print("ℹ️ No SPACE_ID found.")
print("Launching Gradio Interface...")
demo.launch(debug=True, share=False)