Final_Assignment_Project

Running

App Files Files Community

wt002 commited on 5 days ago

Commit

391c163

verified ·

1 Parent(s): f008d28

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -128

app.py CHANGED Viewed

@@ -2,21 +2,14 @@ import os
 import gradio as gr
 import requests
-from typing import Union
-import os
-#from langchain.agents.agent import Agent
-#from langchain.agents.tools import Tool
-#from langchain.agents import AgentExecutor, initialize_agent
-from langchain_community.llms import Ollama
-from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
-from langchain_community.document_loaders import (
-    CSVLoader,
-    PyPDFLoader,
-    UnstructuredWordDocumentLoader
-)
-from langchain_community.utilities import TextRequestsWrapper
 import speech_recognition as sr
 from pydub import AudioSegment
 # (Keep Constants as is)
 # --- Constants ---
@@ -25,7 +18,20 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
-    def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
@@ -34,71 +40,44 @@ class BasicAgent:
         print(f"Agent returning answer: {fixed_answer}")
         return fixed_answer
-    def __init__(self, model_name: str = "llama3"):
-        """
-        Open-source multi-modal agent with:
-        - Web search
-        - Document processing
-        - Speech-to-text
-        - URL content fetching
-        """
-        # Initialize LLM (local via Ollama)
-        self.llm = Ollama(model=model_name, temperature=0.7)
-        # Initialize tools
-        self.search_tool = DuckDuckGoSearchRun()
-        self.wikipedia_tool = WikipediaQueryRun()
-        self.requests_tool = TextRequestsWrapper()
-        # Speech recognition
-        self.recognizer = sr.Recognizer()
-        # Initialize agent
-        self.tools = self._initialize_tools()
-        self.agent = self._create_agent()
-    def _initialize_tools(self) -> list[Tool]:
-        """Initialize all available tools"""
-        return [
-            Tool(
-                name="Web Search",
-                func=self.search_tool.run,
-                description="For current events/unknown topics"
-            ),
-            Tool(
-                name="Wikipedia",
-                func=self.wikipedia_tool.run,
-                description="For factual information"
-            ),
-            Tool(
-                name="Document Loader",
-                func=self.process_document,
-                description="Processes PDF, Word, CSV files"
-            ),
-            Tool(
-                name="Speech Transcription",
-                func=self.transcribe_audio,
-                description="Converts speech from audio files to text"
-            ),
-            Tool(
-                name="Website Content",
-                func=self.requests_tool.get,
-                description="Fetches content from URLs"
-            )
-        ]
-    def _create_agent(self) -> AgentExecutor:
-        """Create the agent executor"""
-        return initialize_agent(
-            tools=self.tools,
-            llm=self.llm,
-            agent="structured-chat-react",
-            verbose=True,
-            handle_parsing_errors=True
-        )
     def process_document(self, file_path: str) -> str:
-        """Handle different document types"""
         if not os.path.exists(file_path):
             return "File not found"
@@ -106,71 +85,57 @@ class BasicAgent:
         try:
             if ext == '.pdf':
-                loader = PyPDFLoader(file_path)
             elif ext in ('.doc', '.docx'):
-                loader = UnstructuredWordDocumentLoader(file_path)
             elif ext == '.csv':
-                loader = CSVLoader(file_path)
             else:
                 return "Unsupported file format"
-            docs = loader.load()
-            return "\n".join([doc.page_content for doc in docs])
         except Exception as e:
             return f"Error processing document: {str(e)}"
-    def _convert_audio_format(self, audio_path: str) -> str:
-        """Convert audio to WAV format if needed"""
-        if audio_path.endswith('.wav'):
-            return audio_path
-        try:
-            sound = AudioSegment.from_file(audio_path)
-            wav_path = os.path.splitext(audio_path)[0] + ".wav"
-            sound.export(wav_path, format="wav")
-            return wav_path
-        except:
-            return audio_path  # Fallback to original if conversion fails
     def transcribe_audio(self, audio_path: str) -> str:
-        """Convert speech to text using purely open-source tools"""
-        audio_path = self._convert_audio_format(audio_path)
         try:
             with sr.AudioFile(audio_path) as source:
                 audio = self.recognizer.record(source)
-                return self.recognizer.recognize_vosk(audio)  # Offline recognition
-        except sr.UnknownValueError:
-            try:
-                # Fallback to Sphinx if Vosk fails
-                return self.recognizer.recognize_sphinx(audio)
-            except Exception as e:
-                return f"Transcription failed: {str(e)}"
-    def run(self, input_data: Union[str, dict]) -> str:
         """
-        Handle different input types:
-        - Text queries
-        - File paths
-        - Structured requests
         """
-        if isinstance(input_data, dict):
-            if 'query' in input_data:
-                return self.agent.run(input_data['query'])
-            elif 'file' in input_data:
-                content = self.process_document(input_data['file'])
-                return self.agent.run(f"Process this: {content}")
-        elif isinstance(input_data, str):
-            if input_data.endswith(('.pdf', '.docx', '.csv')):
-                content = self.process_document(input_data)
-                return self.agent.run(f"Process this document: {content}")
-            elif input_data.endswith(('.wav', '.mp3', '.ogg')):
-                content = self.transcribe_audio(input_data)
-                return self.agent.run(f"Process this transcript: {content}")
-            else:
-                return self.agent.run(input_data)
-        return "Unsupported input type"

 import gradio as gr
 import requests
+import json
+from typing import List, Dict, Union
 import speech_recognition as sr
 from pydub import AudioSegment
+import wikipediaapi
+import pandas as pd
+from PyPDF2 import PdfReader
+from docx import Document
 # (Keep Constants as is)
 # --- Constants ---
 # --- Basic Agent Definition ---
 class BasicAgent:
+    def __init__(self, ollama_base_url: str = "http://localhost:11434"):
+        """
+        Pure Python agent with:
+        - Local LLM via Ollama
+        - Web search (SearxNG)
+        - Wikipedia access
+        - Document processing
+        - Speech-to-text
+        """
+        self.ollama_url = f"{ollama_base_url}/api/generate"
+        self.searx_url = "https://searx.space/search"  # Public Searx instance
+        self.wiki = wikipediaapi.Wikipedia('en')
+        self.recognizer = sr.Recognizer()
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent returning answer: {fixed_answer}")
         return fixed_answer
+    def call_llm(self, prompt: str, model: str = "llama3") -> str:
+        """Call local Ollama LLM"""
+        payload = {
+            "model": model,
+            "prompt": prompt,
+            "stream": False
+        }
+        try:
+            response = requests.post(self.ollama_url, json=payload)
+            response.raise_for_status()
+            return response.json().get("response", "")
+        except requests.RequestException as e:
+            return f"LLM Error: {str(e)}"
+    def web_search(self, query: str) -> List[Dict]:
+        """Use SearxNG meta-search engine"""
+        params = {
+            "q": query,
+            "format": "json",
+            "engines": "google,bing,duckduckgo"
+        }
+        try:
+            response = requests.get(self.searx_url, params=params)
+            response.raise_for_status()
+            return response.json().get("results", [])
+        except requests.RequestException:
+            return []
+    def wikipedia_search(self, query: str) -> str:
+        """Get Wikipedia summary"""
+        page = self.wiki.page(query)
+        return page.summary if page.exists() else "No Wikipedia page found"
     def process_document(self, file_path: str) -> str:
+        """Handle PDF, Word, CSV, Excel files"""
         if not os.path.exists(file_path):
             return "File not found"
         try:
             if ext == '.pdf':
+                with open(file_path, 'rb') as f:
+                    reader = PdfReader(f)
+                    return "\n".join([page.extract_text() for page in reader.pages])
             elif ext in ('.doc', '.docx'):
+                doc = Document(file_path)
+                return "\n".join([para.text for para in doc.paragraphs])
             elif ext == '.csv':
+                return pd.read_csv(file_path).to_string()
+            elif ext in ('.xls', '.xlsx'):
+                return pd.read_excel(file_path).to_string()
             else:
                 return "Unsupported file format"
         except Exception as e:
             return f"Error processing document: {str(e)}"
     def transcribe_audio(self, audio_path: str) -> str:
+        """Convert speech to text using Vosk (offline)"""
         try:
+            # Convert to WAV if needed
+            if not audio_path.endswith('.wav'):
+                sound = AudioSegment.from_file(audio_path)
+                audio_path = "temp.wav"
+                sound.export(audio_path, format="wav")
             with sr.AudioFile(audio_path) as source:
                 audio = self.recognizer.record(source)
+                return self.recognizer.recognize_vosk(audio)
+        except Exception as e:
+            return f"Transcription failed: {str(e)}"
+    def process_request(self, request: Union[str, Dict]) -> str:
         """
+        Handle different request types:
+        - Direct text queries
+        - File processing requests
+        - Complex multi-step requests
         """
+        if isinstance(request, dict):
+            # Complex request handling
+            if 'steps' in request:
+                results = []
+                for step in request['steps']:
+                    if step['type'] == 'search':
+                        results.append(self.web_search(step['query']))
+                    elif step['type'] == 'process':
+                        results.append(self.process_document(step['file']))
+                return self.call_llm(f"Process these results: {json.dumps(results)}")
+            return "Unsupported request format"
+        # Simple text query
+        return self.call_llm(request)