agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on 9 days ago

Commit

f0544fd

1 Parent(s): 09f2a63

Adding audio tools with whisper

Browse files

Files changed (3) hide show

agents/llama_index_agent.py +5 -1
requirements.txt +2 -1
tools/multimedia_tools.py +114 -0

agents/llama_index_agent.py CHANGED Viewed

@@ -7,6 +7,8 @@ import os
 from typing import Optional, List, Any, Dict
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.anthropic import Anthropic
 from tools.web_tools import (
     tavily_tool,
@@ -58,7 +60,9 @@ class GaiaAgent(ReActAgent):
                 reverse_text_tool,
                 wikipedia_tool.load_data,
                 wikipedia_tool.search_data,
-                tavily_tool.search
                 ]
         # Use default system prompt if not provided

 from typing import Optional, List, Any, Dict
 from llama_index.llms.openai import OpenAI
 from llama_index.llms.anthropic import Anthropic
+# In your GaiaAgent class initialization, add these imports at the top
+from tools.multimedia_tools import transcribe_audio, get_audio_metadata
 from tools.web_tools import (
     tavily_tool,
                 reverse_text_tool,
                 wikipedia_tool.load_data,
                 wikipedia_tool.search_data,
+                tavily_tool.search,
+                transcribe_audio,
+                get_audio_metadata
                 ]
         # Use default system prompt if not provided

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ llama-index
 llama-index-tools-wikipedia
 llama-index-tools-tavily-research
 llama-index-llms-anthropic
-llama-index-llms-openai

 llama-index-tools-wikipedia
 llama-index-tools-tavily-research
 llama-index-llms-anthropic
+llama-index-llms-openai
+llama-index-readers-whisper

tools/multimedia_tools.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+from typing import Optional, List, Tuple
+from pathlib import Path
+from llama_index.readers.whisper import WhisperReader
+from llama_index.core.schema import Document
+class WhisperTool:
+    """Tool for transcribing audio files using OpenAI's Whisper model."""
+    def __init__(self, model: str = "whisper-1", api_key: Optional[str] = None):
+        """
+        Initialize the WhisperTool.
+        Args:
+            model: The Whisper model to use
+            api_key: OpenAI API key (defaults to OPENAI_API_KEY environment variable)
+        """
+        self.reader = WhisperReader(
+            model=model,
+            api_key=api_key or os.getenv("OPENAI_API_KEY"),
+        )
+    def transcribe(self, audio_path: str) -> str:
+        """
+        Transcribe an audio file to text.
+        Args:
+            audio_path: Path to the audio file
+        Returns:
+            Transcribed text content
+        """
+        try:
+            documents = self.reader.load_data(audio_path)
+            if not documents:
+                return "No transcription was generated."
+            # Return the combined text from all documents
+            return "\n\n".join([doc.text for doc in documents])
+        except Exception as e:
+            return f"Error transcribing audio: {str(e)}"
+    async def transcribe_async(self, audio_path: str) -> str:
+        """
+        Transcribe an audio file to text asynchronously.
+        Args:
+            audio_path: Path to the audio file
+        Returns:
+            Transcribed text content
+        """
+        try:
+            documents = await self.reader.aload_data(audio_path)
+            if not documents:
+                return "No transcription was generated."
+            # Return the combined text from all documents
+            return "\n\n".join([doc.text for doc in documents])
+        except Exception as e:
+            return f"Error transcribing audio: {str(e)}"
+    def get_metadata(self, audio_path: str) -> dict:
+        """
+        Get metadata about an audio file.
+        Args:
+            audio_path: Path to the audio file
+        Returns:
+            Dictionary containing metadata
+        """
+        path = Path(audio_path)
+        try:
+            # Basic file metadata
+            metadata = {
+                "filename": path.name,
+                "extension": path.suffix,
+                "size_bytes": path.stat().st_size if path.exists() else None,
+                "exists": path.exists(),
+                "is_file": path.is_file() if path.exists() else None,
+            }
+            return metadata
+        except Exception as e:
+            return {"error": str(e)}
+# Create a singleton instance for use as a tool
+whisper_tool = WhisperTool()
+# Define tool functions that can be used directly with LlamaIndex
+def transcribe_audio(audio_path: str) -> str:
+    """
+    Transcribe an audio file to text.
+    Args:
+        audio_path: Path to the audio file
+    Returns:
+        Transcribed text content
+    """
+    return whisper_tool.transcribe(audio_path)
+def get_audio_metadata(audio_path: str) -> dict:
+    """
+    Get metadata about an audio file.
+    Args:
+        audio_path: Path to the audio file
+    Returns:
+        Dictionary containing metadata
+    """
+    return whisper_tool.get_metadata(audio_path)