Commit
·
586cfcf
1
Parent(s):
ccc745c
remove final answer
Browse files
agent.py
CHANGED
@@ -19,7 +19,6 @@ from supabase.client import Client, create_client
|
|
19 |
load_dotenv()
|
20 |
|
21 |
# === Tools ===
|
22 |
-
|
23 |
@tool
|
24 |
def multiply(a: int, b: int) -> int:
|
25 |
"""Multiply two integers."""
|
@@ -72,7 +71,7 @@ sys_msg = SystemMessage(content=system_prompt)
|
|
72 |
|
73 |
# === Embeddings and Vector Store ===
|
74 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
75 |
-
supabase
|
76 |
vector_store = SupabaseVectorStore(
|
77 |
client=supabase,
|
78 |
embedding=embeddings,
|
@@ -83,7 +82,7 @@ vector_store = SupabaseVectorStore(
|
|
83 |
# === Tools ===
|
84 |
tools = [multiply, add, subtract, divide, modulus, wiki_search, web_search, arvix_search]
|
85 |
|
86 |
-
# ===
|
87 |
def build_graph(provider: str = "groq"):
|
88 |
if provider == "google":
|
89 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
|
@@ -104,6 +103,7 @@ def build_graph(provider: str = "groq"):
|
|
104 |
def assistant(state: MessagesState):
|
105 |
response = llm_with_tools.invoke(state["messages"])
|
106 |
content = response.content.strip()
|
|
|
107 |
if "FINAL ANSWER:" in content:
|
108 |
content = content.split("FINAL ANSWER:")[-1].strip()
|
109 |
return {"messages": [AIMessage(content=content)]}
|
|
|
19 |
load_dotenv()
|
20 |
|
21 |
# === Tools ===
|
|
|
22 |
@tool
|
23 |
def multiply(a: int, b: int) -> int:
|
24 |
"""Multiply two integers."""
|
|
|
71 |
|
72 |
# === Embeddings and Vector Store ===
|
73 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
74 |
+
supabase = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY"))
|
75 |
vector_store = SupabaseVectorStore(
|
76 |
client=supabase,
|
77 |
embedding=embeddings,
|
|
|
82 |
# === Tools ===
|
83 |
tools = [multiply, add, subtract, divide, modulus, wiki_search, web_search, arvix_search]
|
84 |
|
85 |
+
# === Graph Builder ===
|
86 |
def build_graph(provider: str = "groq"):
|
87 |
if provider == "google":
|
88 |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
|
|
|
103 |
def assistant(state: MessagesState):
|
104 |
response = llm_with_tools.invoke(state["messages"])
|
105 |
content = response.content.strip()
|
106 |
+
# Extract exact match content, remove FINAL ANSWER: if present
|
107 |
if "FINAL ANSWER:" in content:
|
108 |
content = content.split("FINAL ANSWER:")[-1].strip()
|
109 |
return {"messages": [AIMessage(content=content)]}
|
app.py
CHANGED
@@ -20,25 +20,20 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
20 |
|
21 |
cached_answers = []
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
class BasicAgent:
|
24 |
-
"""A langgraph agent."""
|
25 |
def __init__(self):
|
26 |
-
print("BasicAgent initialized.")
|
27 |
self.graph = build_graph()
|
28 |
|
29 |
def __call__(self, question: str) -> str:
|
30 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
31 |
messages = [HumanMessage(content=question)]
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# Enforce strict FINAL ANSWER format parsing
|
36 |
-
match = re.search(r"FINAL ANSWER:\s*(.+)", full_output, re.IGNORECASE)
|
37 |
-
if match:
|
38 |
-
return match.group(0).strip() # Returns the entire "FINAL ANSWER: xxx"
|
39 |
-
else:
|
40 |
-
print(" FINAL ANSWER not found in output, returning fallback.")
|
41 |
-
return "FINAL ANSWER: unknown"
|
42 |
|
43 |
def run_agent_only(profile: gr.OAuthProfile | None):
|
44 |
global cached_answers
|
@@ -53,86 +48,66 @@ def run_agent_only(profile: gr.OAuthProfile | None):
|
|
53 |
except Exception as e:
|
54 |
return f"Agent Init Error: {e}", None
|
55 |
|
56 |
-
questions_url = f"{DEFAULT_API_URL}/questions"
|
57 |
-
|
58 |
try:
|
59 |
-
|
60 |
-
questions_data = response.json()
|
61 |
except Exception as e:
|
62 |
return f"Error fetching questions: {e}", None
|
63 |
|
64 |
-
with open("system_prompt.txt", "r"
|
65 |
system_prompt = f.read().strip()
|
66 |
|
67 |
-
for item in
|
68 |
task_id = item.get("task_id")
|
69 |
question = item.get("question")
|
70 |
file_name = item.get("file_name")
|
71 |
|
72 |
-
if not task_id or question
|
73 |
continue
|
74 |
|
75 |
try:
|
76 |
-
user_message = question
|
77 |
-
|
78 |
-
user_message += f"\n\nFile to use: {file_name}"
|
79 |
-
|
80 |
-
full_input = system_prompt + "\n\n" + user_message
|
81 |
-
answer = agent(full_input)
|
82 |
cached_answers.append({"task_id": task_id, "submitted_answer": answer})
|
83 |
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
|
84 |
except Exception as e:
|
85 |
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"AGENT ERROR: {e}"})
|
86 |
|
87 |
-
return "Agent finished.
|
88 |
|
89 |
def submit_cached_answers(profile: gr.OAuthProfile | None):
|
90 |
-
global cached_answers
|
91 |
if not profile or not cached_answers:
|
92 |
-
return "
|
93 |
-
|
94 |
-
space_id = os.getenv("SPACE_ID")
|
95 |
-
username = profile.username
|
96 |
-
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
97 |
|
98 |
payload = {
|
99 |
-
"username": username,
|
100 |
-
"agent_code":
|
101 |
"answers": cached_answers
|
102 |
}
|
103 |
|
104 |
-
submit_url = f"{DEFAULT_API_URL}/submit"
|
105 |
-
|
106 |
try:
|
107 |
-
response = requests.post(
|
108 |
result = response.json()
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
)
|
113 |
-
return final_status, None
|
114 |
except Exception as e:
|
115 |
return f"Submission failed: {e}", None
|
116 |
|
117 |
-
# --- Gradio UI ---
|
118 |
with gr.Blocks() as demo:
|
119 |
-
gr.Markdown("#
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
2. Then click 'Submit Cached Answers' to submit them for scoring.
|
124 |
-
""")
|
125 |
|
126 |
gr.LoginButton()
|
|
|
|
|
|
|
|
|
127 |
|
128 |
-
run_button
|
129 |
-
submit_button
|
130 |
-
|
131 |
-
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
132 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
133 |
-
|
134 |
-
run_button.click(fn=run_agent_only, outputs=[status_output, results_table])
|
135 |
-
submit_button.click(fn=submit_cached_answers, outputs=[status_output, results_table])
|
136 |
|
137 |
if __name__ == "__main__":
|
138 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
20 |
|
21 |
cached_answers = []
|
22 |
|
23 |
+
def parse_final_answer(raw: str) -> str:
|
24 |
+
raw = raw.strip()
|
25 |
+
if "FINAL ANSWER:" in raw:
|
26 |
+
return raw.split("FINAL ANSWER:")[-1].strip()
|
27 |
+
return raw.split("Final Answer:")[-1].strip() if "Final Answer:" in raw else raw
|
28 |
+
|
29 |
class BasicAgent:
|
|
|
30 |
def __init__(self):
|
|
|
31 |
self.graph = build_graph()
|
32 |
|
33 |
def __call__(self, question: str) -> str:
|
|
|
34 |
messages = [HumanMessage(content=question)]
|
35 |
+
output = self.graph.invoke({"messages": messages})
|
36 |
+
return parse_final_answer(output['messages'][-1].content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def run_agent_only(profile: gr.OAuthProfile | None):
|
39 |
global cached_answers
|
|
|
48 |
except Exception as e:
|
49 |
return f"Agent Init Error: {e}", None
|
50 |
|
|
|
|
|
51 |
try:
|
52 |
+
questions = requests.get("https://agents-course-unit4-scoring.hf.space/questions", timeout=15).json()
|
|
|
53 |
except Exception as e:
|
54 |
return f"Error fetching questions: {e}", None
|
55 |
|
56 |
+
with open("system_prompt.txt", "r") as f:
|
57 |
system_prompt = f.read().strip()
|
58 |
|
59 |
+
for item in questions:
|
60 |
task_id = item.get("task_id")
|
61 |
question = item.get("question")
|
62 |
file_name = item.get("file_name")
|
63 |
|
64 |
+
if not task_id or not question:
|
65 |
continue
|
66 |
|
67 |
try:
|
68 |
+
user_message = question + (f"\n\nFile to use: {file_name}" if file_name else "")
|
69 |
+
answer = agent(system_prompt + "\n\n" + user_message)
|
|
|
|
|
|
|
|
|
70 |
cached_answers.append({"task_id": task_id, "submitted_answer": answer})
|
71 |
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
|
72 |
except Exception as e:
|
73 |
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"AGENT ERROR: {e}"})
|
74 |
|
75 |
+
return "Agent finished. Click 'Submit Cached Answers' next.", pd.DataFrame(results_log)
|
76 |
|
77 |
def submit_cached_answers(profile: gr.OAuthProfile | None):
|
|
|
78 |
if not profile or not cached_answers:
|
79 |
+
return "Nothing to submit. Run the agent first.", None
|
|
|
|
|
|
|
|
|
80 |
|
81 |
payload = {
|
82 |
+
"username": profile.username,
|
83 |
+
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
|
84 |
"answers": cached_answers
|
85 |
}
|
86 |
|
|
|
|
|
87 |
try:
|
88 |
+
response = requests.post("https://agents-course-unit4-scoring.hf.space/submit", json=payload, timeout=60)
|
89 |
result = response.json()
|
90 |
+
score = result.get("score", "?")
|
91 |
+
correct = result.get("correct_count", "?")
|
92 |
+
total = result.get("total_attempted", "?")
|
93 |
+
return f"Submission complete. Score: {score}% ({correct}/{total})", None
|
|
|
94 |
except Exception as e:
|
95 |
return f"Submission failed: {e}", None
|
96 |
|
|
|
97 |
with gr.Blocks() as demo:
|
98 |
+
gr.Markdown("""# Agent Evaluator
|
99 |
+
1. Login with Hugging Face
|
100 |
+
2. Run agent only
|
101 |
+
3. Submit answers""")
|
|
|
|
|
102 |
|
103 |
gr.LoginButton()
|
104 |
+
run_button = gr.Button("Run Agent")
|
105 |
+
submit_button = gr.Button("Submit Cached Answers")
|
106 |
+
status_box = gr.Textbox(label="Status", lines=4)
|
107 |
+
table = gr.DataFrame(label="Answers Log")
|
108 |
|
109 |
+
run_button.click(fn=run_agent_only, outputs=[status_box, table])
|
110 |
+
submit_button.click(fn=submit_cached_answers, outputs=[status_box, table]))
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|