Final_Assignment_1

Running

App Files Files Community

Jofthomas commited on 6 days ago

Commit

31243f4

verified ·

1 Parent(s): e80aab9

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -384

app.py CHANGED Viewed

@@ -2,452 +2,199 @@ import os
 import gradio as gr
 import requests
 import inspect # To get source code for __repr__
-import asyncio
-from typing import Dict, List, AsyncGenerator, Union, Tuple, Optional
-# --- LangChain Specific Imports ---
-from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
-from langchain_core.tools import tool
-from langchain_openai import ChatOpenAI
-from langgraph.checkpoint.memory import MemorySaver
-from langgraph.prebuilt import create_react_agent
 # --- Constants ---
 DEFAULT_API_URL = "http://127.0.0.1:8000" # Default URL for your FastAPI app
-# --- Tools (Keep these defined globally or ensure they are included in __repr__) ---
-@tool
-def get_lat_lng(location_description: str) -> dict[str, float]:
-    """Get the latitude and longitude of a location."""
-    print(f"Tool: Getting lat/lng for {location_description}")
-    # Replace with actual API call in a real app
-    if "tokyo" in location_description.lower():
-        return {"lat": 35.6895, "lng": 139.6917}
-    elif "paris" in location_description.lower():
-         return {"lat": 48.8566, "lng": 2.3522}
-    elif "new york" in location_description.lower():
-        return {"lat": 40.7128, "lng": -74.0060}
-    else:
-        return {"lat": 51.5072, "lng": -0.1276} # Default London
-@tool
-def get_weather(lat: float, lng: float) -> dict[str, str]:
-    """Get the weather at a location."""
-    print(f"Tool: Getting weather for lat={lat}, lng={lng}")
-    # Replace with actual API call in a real app
-    if lat > 45: # Northern locations
-        return {"temperature": "15°C", "description": "Cloudy"}
-    elif lat > 30: # Mid locations
-        return {"temperature": "25°C", "description": "Sunny"}
-    else: # Southern locations
-        return {"temperature": "30°C", "description": "Very Sunny"}
-# --- Agent Class Definition ---
-class MyLangChainAgent:
     """
-    A sample LangChain agent class designed for interaction and submission.
-    NOTE: The current tools (weather/location) are placeholders and WILL NOT
-          correctly answer GAIA benchmark questions. This class structure
-          demonstrates how to integrate an agent with the submission API.
-          Replace LLM, tools, and potentially the agent type for actual GAIA tasks.
     """
-    def __init__(self, model_name="gpt-4", temperature=0):
-        # Ensure API key is available
-        if not os.getenv("OPENAI_API_KEY"):
-            raise ValueError("OPENAI_API_KEY environment variable not set.")
-        self.llm = ChatOpenAI(temperature=temperature, model=model_name)
-        self.tools = [get_lat_lng, get_weather] # Use the globally defined tools
-        self.memory = MemorySaver()
-        # Create the agent executor
-        self.agent_executor = create_react_agent(self.llm, self.tools, checkpointer=self.memory)
-        print("MyLangChainAgent initialized.")
-    async def __call__(self, question: str, thread_id: str) -> AsyncGenerator[Union[str, Dict[str, str]], str]:
         """
-        Runs the agent asynchronously, yielding intermediate steps and returning the final answer.
-        Args:
-            question: The input question string.
-            thread_id: A unique identifier for the conversation thread.
-        Yields:
-            Intermediate steps (tool calls/results) as strings or dicts.
-        Returns:
-            The final AI answer as a string.
         """
-        print(f"Agent executing for thread_id: {thread_id} on question: {question[:50]}...")
-        lc_messages: List[BaseMessage] = [HumanMessage(content=question)]
-        final_answer = ""
-        full_response_content = "" # Store the complete AI response chunks
-        async for chunk in self.agent_executor.astream_events(
-            {"messages": lc_messages},
-            config={"configurable": {"thread_id": thread_id}},
-            version="v1"
-        ):
-            event = chunk["event"]
-            data = chunk["data"]
-            # print(f"DEBUG: Event: {event}, Data Keys: {data.keys()}") # Debugging line
-            if event == "on_chat_model_stream":
-                content = data["chunk"].content
-                if content:
-                    # print(f"DEBUG: AI Chunk: {content}") # Debugging line
-                    full_response_content += content
-                    # Yield potentially incomplete response for live typing effect if needed
-                    # yield {"type": "stream", "content": content }
-            elif event == "on_tool_start":
-                tool_input_str = str(data.get('input', ''))
-                yield f"🛠️ Using tool: **{data['name']}** with input: `{tool_input_str}`"
-            elif event == "on_tool_end":
-                 tool_output_str = str(data.get('output', ''))
-                 yield f"✅ Tool **{data['name']}** finished.\nResult: `{tool_output_str}`"
-            # Detect the end of the conversation turn (heuristic)
-            # The 'on_chain_end' event for the top-level graph might signal the end.
-            # Or check the 'messages' list in the final state if available.
-            # For create_react_agent, the final AIMessage is often the last main event.
-            # We will capture the last full AI message content after the loop.
-        # After iterating through all chunks, the final answer should be in full_response_content
-        final_answer = full_response_content.strip()
-        print(f"Agent execution finished. Final Answer: {final_answer[:100]}...")
-        # Yield the complete final answer distinctly if needed
-        # yield {"type": "final_answer_marker", "content": final_answer} # Example marker
-        return final_answer # Return the final answer
     def __repr__(self) -> str:
         """
-        Return the source code required to reconstruct this agent, including
-        the class definition, tool functions, and necessary imports.
         """
         imports = [
-            "import os",
-            "from typing import Dict, List, AsyncGenerator, Union, Tuple, Optional",
-            "from langchain_core.messages import HumanMessage, AIMessage, BaseMessage",
-            "from langchain_core.tools import tool",
-            "from langchain_openai import ChatOpenAI",
-            "from langgraph.checkpoint.memory import MemorySaver",
-            "from langgraph.prebuilt import create_react_agent",
-            "import inspect", # Needed if repr itself uses inspect dynamically
-            "import asyncio", # Needed for async call
-            "\n"
         ]
-        # Get source code of tool functions
-        tool_sources = []
-        for t in self.tools:
-             try:
-                 tool_sources.append(inspect.getsource(t))
-             except (TypeError, OSError) as e:
-                  print(f"Warning: Could not get source for tool {t.__name__}: {e}")
-                  tool_sources.append(f"# Could not automatically get source for tool: {t.__name__}\n")
-        # Get source code of the class itself
-        class_source = inspect.getsource(MyLangChainAgent)
-        # Combine imports, tools, and class definition
-        full_source = "\n".join(imports) + "\n\n" + \
-                      "\n\n".join(tool_sources) + "\n\n" + \
-                      class_source
         return full_source
 # --- Gradio UI and Logic ---
-# Initialize the agent (do this once outside the request functions)
-# Handle potential API key error during initialization
-try:
-    agent_instance = MyLangChainAgent()
-except ValueError as e:
-    print(f"ERROR initializing agent: {e}")
-    # Provide a dummy agent or exit if critical
-    agent_instance = None # Or raise SystemExit("Agent initialization failed")
-def format_chat_history(history: List[List[Optional[str]]]) -> List[Tuple[Optional[str], Optional[str]]]:
-    """Helper to format Gradio history for display."""
-    # Gradio's history format is List[List[user_msg | None, ai_msg | None]]
-    # We want List[Tuple[user_msg | None, ai_msg | None]] for Chatbot
-    formatted = []
-    for turn in history:
-        formatted.append(tuple(turn))
-    return formatted
-async def fetch_and_display_question(api_url: str):
-    """Calls the backend to get a random question."""
-    if not api_url:
-        return "Please enter the API URL.", "", "", gr.update(value=""), gr.update(value="") # Clear chat too
-    question_url = f"{api_url.strip('/')}/random-question"
-    print(f"Fetching question from: {question_url}")
     try:
-        response = requests.get(question_url, timeout=10)
-        response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
-        data = response.json()
-        task_id = data.get("task_id")
-        question_text = data.get("question")
-        if task_id and question_text:
-            print(f"Fetched Task ID: {task_id}")
-            # Return updates for Gradio components: Status, Task ID, Question Text, Clear Agent Answer, Clear Chat
-            return "Question fetched successfully!", task_id, question_text, "", [] # Clears answer and chat history
-        else:
-            return "Error: Invalid data format received from API.", "", "", "", []
     except requests.exceptions.RequestException as e:
-        print(f"Error fetching question: {e}")
-        return f"Error fetching question: {e}", "", "", "", []
     except Exception as e:
-        print(f"An unexpected error occurred: {e}")
-        return f"An unexpected error occurred: {e}", "", "", "", []
-async def run_agent_interaction(
-    message: str,
-    history: List[List[Optional[str]]],
-    current_task_id: str,
-    # agent_instance: MyLangChainAgent # Agent passed via state potentially
-):
-    """Handles the chat interaction, runs the agent, yields steps, updates final answer state."""
-    if agent_instance is None:
-         yield "Agent not initialized. Please check API keys and restart."
-         return
-    if not current_task_id:
-        yield "Please fetch a question first using the button above."
-        return
-    # The 'message' here is the user's latest input in the chat.
-    # For this workflow, we assume the main input is the fetched question.
-    # We'll use the fetched question (implicitly stored) to run the agent.
-    # If you want interactive chat *about* the question, the logic needs adjustment.
-    # For simplicity, let's assume the user's message *is* the question or a prompt related to it.
-    # In the GAIA context, usually, the agent just runs on the provided question directly.
-    # We'll use the `current_task_id` to generate a unique thread_id for LangGraph memory.
-    thread_id = f"gaia_task_{current_task_id}_{os.urandom(4).hex()}"
-    print(f"Running agent for user message: {message[:50]}...")
-    history.append([message, None]) # Add user message to history
-    final_agent_answer = None
-    full_yielded_response = ""
-    # Use the agent's __call__ method
-    async for step in agent_instance(message, thread_id=thread_id):
-        if isinstance(step, str):
-            # Intermediate step (tool call, result, maybe stream chunk)
-            history[-1][1] = step # Update the AI's response in the last turn
-            yield format_chat_history(history) # Update chatbot UI
-            full_yielded_response = step # Track last yielded message
-        # If __call__ yielded dicts for streaming, handle here:
-        # elif isinstance(step, dict) and step.get("type") == "stream":
-        #     history[-1][1] = (history[-1][1] or "") + step["content"]
-        #     yield format_chat_history(history)
-    # After the loop, the `step` variable holds the return value (final answer)
-    final_agent_answer = step
-    print(f"Agent final answer received: {final_agent_answer[:100]}...")
-    # Update the history with the definitive final answer
-    if final_agent_answer:
-        history[-1][1] = final_agent_answer # Replace intermediate steps with final one
-    elif full_yielded_response:
-        # Fallback if final answer wasn't returned correctly but we yielded something
-         history[-1][1] = full_yielded_response
-         final_agent_answer = full_yielded_response # Use the last yielded message as answer
-    else:
-         history[-1][1] = "Agent did not produce a final answer."
-         final_agent_answer = "" # Ensure it's a string
-    # Yield the final state of the history and update the hidden state for the final answer
-    yield format_chat_history(history), final_agent_answer
-def submit_to_leaderboard(
-    api_url: str,
-    username: str,
-    task_id: str,
-    agent_answer: str,
-    # agent_instance: MyLangChainAgent # Pass agent via state if needed
-):
-    """Submits the agent's answer and code to the FastAPI backend."""
-    if agent_instance is None:
-        return "Agent not initialized. Cannot submit."
-    if not api_url:
-        return "Please enter the API URL."
-    if not username:
-        return "Please enter your Hugging Face username."
-    if not task_id:
-        return "No task ID available. Please fetch a question first."
-    if agent_answer is None or agent_answer.strip() == "": # Check if None or empty
-        # Maybe allow submission of empty answer? Depends on requirements.
-        print("Warning: Submitting empty answer.")
-        # return "Agent has not provided an answer yet."
-    submit_url = f"{api_url.strip('/')}/submit"
-    print(f"Submitting to: {submit_url}")
-    # Get agent code
-    try:
-        agent_code = agent_instance.__repr__()
-        # print(f"Agent Code (first 200 chars):\n{agent_code[:200]}...") # Debug
-    except Exception as e:
-        print(f"Error getting agent representation: {e}")
-        return f"Error generating agent code for submission: {e}"
-    # Prepare submission data according to Pydantic model in FastAPI
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
-        "answers": [
-            {
-                "task_id": task_id,
-                "submitted_answer": agent_answer # Use the stored final answer
-            }
-            # Add more answers here if submitting a batch
-        ]
     }
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=30)
         response.raise_for_status()
         result_data = response.json()
-        # Format the result nicely for display
-        result_message = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
-            f"Score: {result_data.get('score')}\n"
-            f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
-            f"Message: {result_data.get('message')}\n"
-            f"Timestamp: {result_data.get('timestamp')}"
         )
         print("Submission successful.")
-        return result_message
     except requests.exceptions.HTTPError as e:
-        # Try to get detail from response body if available
         error_detail = e.response.text
         try:
             error_json = e.response.json()
             error_detail = error_json.get('detail', error_detail)
         except requests.exceptions.JSONDecodeError:
-            pass # Keep the raw text if not JSON
-        print(f"HTTP Error during submission: {e.response.status_code} - {error_detail}")
-        return f"Submission Failed (HTTP {e.response.status_code}): {error_detail}"
     except requests.exceptions.RequestException as e:
-        print(f"Network error during submission: {e}")
-        return f"Submission Failed: Network error - {e}"
     except Exception as e:
-        print(f"An unexpected error occurred during submission: {e}")
-        return f"Submission Failed: An unexpected error occurred - {e}"
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Agent Evaluation Interface")
     gr.Markdown(
-        "Fetch a random question from the evaluation API, interact with the agent "
-        "(Note: the default agent answers weather questions, not GAIA), "
-        "and submit the agent's final answer to the leaderboard."
     )
-    # --- State Variables ---
-    # Store current task info, agent's final answer, and the agent instance
-    current_task_id = gr.State("")
-    current_question_text = gr.State("")
-    current_agent_answer = gr.State("") # Stores the final answer string from the agent
-    # agent_state = gr.State(agent_instance) # Pass agent instance via state
     with gr.Row():
         api_url_input = gr.Textbox(label="FastAPI API URL", value=DEFAULT_API_URL)
         hf_username_input = gr.Textbox(label="Hugging Face Username")
-    with gr.Row():
-        fetch_button = gr.Button("Get Random Question")
-        submission_status_display = gr.Textbox(label="Status", interactive=False) # For fetch status
-    with gr.Row():
-        question_display = gr.Textbox(label="Current Question", lines=3, interactive=False)
-    gr.Markdown("---")
-    gr.Markdown("## Agent Interaction")
-    chatbot = gr.Chatbot(label="Agent Conversation", height=400)
-    msg_input = gr.Textbox(label="Send a message to the Agent (or just observe)") # Input for chat
-    # Hidden Textbox to display the final extracted answer (optional, for clarity)
-    final_answer_display = gr.Textbox(label="Agent's Final Answer (Extracted)", interactive=False)
-    gr.Markdown("---")
-    gr.Markdown("## Submission")
-    with gr.Row():
-         submit_button = gr.Button("Submit Current Answer to Leaderboard")
-    submission_result_display = gr.Markdown(label="Submission Result", value="*Submit an answer to see the result here.*") # Use Markdown for better formatting
-    # --- Component Interactions ---
-    # Fetch Button Action
-    fetch_button.click(
-        fn=fetch_and_display_question,
-        inputs=[api_url_input],
-        outputs=[
-            submission_status_display, # Shows fetch status
-            current_task_id,           # Updates hidden state
-            question_display,          # Updates question text box
-            final_answer_display,      # Clears old final answer
-            chatbot                    # Clears chat history
-        ]
     )
-    # Chat Submission Action (when user sends message in chat)
-    msg_input.submit(
-        fn=run_agent_interaction,
-        inputs=[
-            msg_input,                  # User message from chat input
-            chatbot,                    # Current chat history
-            current_task_id,            # Current task ID from state
-            # agent_state               # Pass agent instance state
-        ],
-        outputs=[
-            chatbot,                    # Updated chat history
-            current_agent_answer        # Update the hidden state holding the final answer
-        ]
-    ).then(
-        # After agent runs, update the visible "Final Answer" box from the state
-        lambda answer_state: answer_state,
-        inputs=[current_agent_answer],
-        outputs=[final_answer_display]
-    )
-    # Clear message input after submission
-    msg_input.submit(lambda: "", None, msg_input, queue=False)
-    # Submit Button Action
-    submit_button.click(
-        fn=submit_to_leaderboard,
-        inputs=[
-            api_url_input,
-            hf_username_input,
-            current_task_id,
-            current_agent_answer,       # Use the stored final answer state
-            # agent_state               # Pass agent instance state
-        ],
-        outputs=[submission_result_display] # Display result message
-    )
 if __name__ == "__main__":
-    if agent_instance is None:
-        print("\nFATAL: Agent could not be initialized. Gradio app will not run correctly.")
-        print("Please ensure OPENAI_API_KEY is set and valid.\n")
-        # Optionally exit here if agent is critical
-        # exit(1)
-    else:
-        print("Launching Gradio Interface...")
-        demo.launch(debug=True, server_name="0.0.0.0") # Share=False by default for security

 import gradio as gr
 import requests
 import inspect # To get source code for __repr__
+import pandas as pd # For displaying results in a table
 # --- Constants ---
 DEFAULT_API_URL = "http://127.0.0.1:8000" # Default URL for your FastAPI app
+# --- Basic Agent Definition ---
+class BasicAgent:
     """
+    A very simple agent placeholder.
+    It just returns a fixed string for any question.
     """
+    def __init__(self):
+        print("BasicAgent initialized.")
+        # Add any setup if needed
+    def __call__(self, question: str) -> str:
         """
+        The agent's logic to answer a question.
+        This basic version ignores the question content.
         """
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Replace this with actual logic if you were building a real agent
+        fixed_answer = "This is a default answer."
+        print(f"Agent returning fixed answer: {fixed_answer}")
+        return fixed_answer
     def __repr__(self) -> str:
         """
+        Return the source code required to reconstruct this agent.
         """
         imports = [
+            "import inspect\n" # May not be strictly needed by the agent logic itself
         ]
+        class_source = inspect.getsource(BasicAgent)
+        full_source = "\n".join(imports) + "\n" + class_source
         return full_source
 # --- Gradio UI and Logic ---
+def run_and_submit_all(api_url: str, username: str):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    if not api_url:
+        return "Please enter the API URL.", None # Status, DataFrame
+    if not username:
+        return "Please enter your Hugging Face username.", None # Status, DataFrame
+    api_url = api_url.strip('/')
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate the Agent
+    try:
+        agent = BasicAgent()
+        agent_code = agent.__repr__()
+        # print(f"Agent Code (first 200): {agent_code[:200]}...") # Debug
+    except Exception as e:
+        print(f"Error instantiating agent or getting repr: {e}")
+        return f"Error initializing agent: {e}", None
+    # 2. Fetch All Questions
+    print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             return "Fetched questions list is empty.", None
+        print(f"Fetched {len(questions_data)} questions.")
+        status_update = f"Fetched {len(questions_data)} questions. Running agent..."
+        # Yield intermediate status if using gr.update
     except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
     except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run Agent on Each Question
+    results_log = [] # To store data for the results table
+    answers_payload = [] # To store data for the submission API
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            submitted_answer = agent(question_text) # Call the agent's logic
+            answers_payload.append({
+                "task_id": task_id,
+                "submitted_answer": submitted_answer
+            })
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": submitted_answer
+            })
+        except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             # Decide how to handle agent errors - skip? submit default?
+             # Here, we'll just log and potentially skip submission for this task if needed
+             results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": f"AGENT ERROR: {e}"
+            })
+    if not answers_payload:
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
+        "answers": answers_payload
     }
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers..."
+    print(status_update)
+    # 5. Submit to Leaderboard
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=45) # Increased timeout
         response.raise_for_status()
         result_data = response.json()
+        # Prepare final status message and results table
+        final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score')}% "
+            f"({result_data.get('correct_count')}/{result_data.get('total_attempted')} correct)\n"
+            f"Message: {result_data.get('message')}"
         )
         print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = e.response.text
         try:
             error_json = e.response.json()
             error_detail = error_json.get('detail', error_detail)
         except requests.exceptions.JSONDecodeError:
+            pass
+        status_message = f"Submission Failed (HTTP {e.response.status_code}): {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log) # Show attempts even if submission failed
+        return status_message, results_df
     except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
+        "Enter the API URL and your username, then click Run. "
+        "This will fetch all questions, run the *very basic* agent on them, "
+        "submit all answers at once, and display the results."
     )
     with gr.Row():
         api_url_input = gr.Textbox(label="FastAPI API URL", value=DEFAULT_API_URL)
         hf_username_input = gr.Textbox(label="Hugging Face Username")
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=4, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # --- Component Interaction ---
+    run_button.click(
+        fn=run_and_submit_all,
+        inputs=[api_url_input, hf_username_input],
+        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True)