agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on 8 days ago

Commit

bcb1c6b

1 Parent(s): 09a77ad

Adding audio tools with whisper

Browse files

Files changed (3) hide show

agents/llama_index_agent.py +6 -4
app.py +62 -10
tools/text_tools.py +1 -0

agents/llama_index_agent.py CHANGED Viewed

@@ -186,10 +186,12 @@ class GaiaAgent(ReActAgent):
         ## HANDLING AUDIO TASKS
         When dealing with audio files:
-        1. Use the transcribe_audio tool to get a full transcript of the audio content
-        2. Extract the specific information requested from the transcript
-        3. Format your answer exactly as requested in the question
-        4. For audio tasks, ensure you've captured all relevant spoken content, including names, facts, or quotes as needed
         ## DELEGATION TO WRITER AGENT
         After completing your analysis, ALWAYS delegate the final answer preparation to the writer_agent with:

         ## HANDLING AUDIO TASKS
         When dealing with audio files:
+        1. Check if an audio file path is available in the context's "audio_file_path" field
+        2. Always use the transcribe_audio tool with the exact file path provided in the context
+        3. Extract the specific information requested from the transcript (e.g., ingredients, page numbers, names)
+        4. Follow any special formatting instructions (e.g., comma-separated list, alphabetical order)
+        5. Make sure to provide exactly what is asked for (e.g., "only list ingredients, not measurements")
+        6. For audio tasks, ensure you've captured all relevant spoken content, including names, facts, or quotes as needed
         ## DELEGATION TO WRITER AGENT
         After completing your analysis, ALWAYS delegate the final answer preparation to the writer_agent with:

app.py CHANGED Viewed

@@ -90,23 +90,49 @@ class BasicAgent:
         else:
             print(f"Writer agent using same model as main agent")
-    def __call__(self, question: str) -> str:
         """Process a GAIA benchmark question and return the formatted answer."""
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
         async def agentic_main():
-            # Initialize context with the question
             initial_state = {
-                "original_question": question,
                 "analysis_notes": "",
                 "format_requirements": "",
                 "next_agent": "",
                 "final_answer": ""
             }
-            # Use the workflow to process the question
             workflow_response = await self.agent_workflow.run(
-                question,
                 initial_state=initial_state
             )
             return workflow_response
@@ -117,7 +143,32 @@ class BasicAgent:
         final_answer = response.response.blocks[-1].text
         print(f"Agent returning answer: {final_answer}")
         return final_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -180,12 +231,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

         else:
             print(f"Writer agent using same model as main agent")
+    def __call__(self, question_data: dict) -> str:
         """Process a GAIA benchmark question and return the formatted answer."""
+        # Extract question text and task_id
+        question_text = question_data.get("question", "")
+        task_id = question_data.get("task_id", "")
+        file_name = question_data.get("file_name", "")
+        print(f"Agent received question (first 50 chars): {question_text[:50]}...")
+        # Download audio file if present
+        local_file_path = None
+        if file_name and task_id:
+            try:
+                local_file_path = self.download_task_file(task_id)
+                print(f"Downloaded audio file to {local_file_path}")
+            except Exception as e:
+                print(f"Error downloading audio file: {e}")
         async def agentic_main():
+            # Initialize context with the question and file path
             initial_state = {
+                "original_question": question_text,
+                "task_id": task_id,
+                "audio_file_path": local_file_path,
                 "analysis_notes": "",
                 "format_requirements": "",
                 "next_agent": "",
                 "final_answer": ""
             }
+            # MODIFY THIS PART - Instead of just passing the question text,
+            # create a more detailed input that includes the audio file path information
+            enhanced_input = f"""Task ID: {task_id}
+                Question: {question_text}
+            """
+            # Add audio file information if available
+            if local_file_path:
+                enhanced_input += f"Audio File Path: {local_file_path}\n\nPlease analyze this question. If it involves an audio file, use the transcribe_audio tool with the provided path."
+            # Use the workflow to process the question with enhanced input
             workflow_response = await self.agent_workflow.run(
+                enhanced_input,
                 initial_state=initial_state
             )
             return workflow_response
         final_answer = response.response.blocks[-1].text
         print(f"Agent returning answer: {final_answer}")
         return final_answer
+    def download_task_file(self, task_id: str) -> str:
+        """Download a task file from the API and return the local file path."""
+        api_url = DEFAULT_API_URL
+        file_url = f"{api_url}/files/{task_id}"
+        print(f"Downloading file from: {file_url}")
+        try:
+            response = requests.get(file_url, stream=True)
+            response.raise_for_status()
+            # Create a directory for downloaded files if it doesn't exist
+            downloads_dir = Path("downloads")
+            downloads_dir.mkdir(exist_ok=True)
+            # Save the file to the downloads directory
+            file_path = downloads_dir / f"{task_id}.mp3"
+            with open(file_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            return str(file_path)
+        except Exception as e:
+            print(f"Error downloading file: {e}")
+            raise
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            # Pass the entire item instead of just the question text
+            submitted_answer = agent(item)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

tools/text_tools.py CHANGED Viewed

@@ -11,3 +11,4 @@ reverse_text_tool = FunctionTool.from_defaults(
     name="reverse_text_tool",
     description="It returns the reversed string of text in the input.",
 )

     name="reverse_text_tool",
     description="It returns the reversed string of text in the input.",
 )