wt002 commited on
Commit
391c163
·
verified ·
1 Parent(s): f008d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -128
app.py CHANGED
@@ -2,21 +2,14 @@ import os
2
  import gradio as gr
3
  import requests
4
 
5
- from typing import Union
6
- import os
7
- #from langchain.agents.agent import Agent
8
- #from langchain.agents.tools import Tool
9
- #from langchain.agents import AgentExecutor, initialize_agent
10
- from langchain_community.llms import Ollama
11
- from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
12
- from langchain_community.document_loaders import (
13
- CSVLoader,
14
- PyPDFLoader,
15
- UnstructuredWordDocumentLoader
16
- )
17
- from langchain_community.utilities import TextRequestsWrapper
18
  import speech_recognition as sr
19
  from pydub import AudioSegment
 
 
 
 
20
 
21
  # (Keep Constants as is)
22
  # --- Constants ---
@@ -25,7 +18,20 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
25
 
26
  # --- Basic Agent Definition ---
27
  class BasicAgent:
28
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  print("BasicAgent initialized.")
30
 
31
  def __call__(self, question: str) -> str:
@@ -34,71 +40,44 @@ class BasicAgent:
34
  print(f"Agent returning answer: {fixed_answer}")
35
  return fixed_answer
36
 
37
- def __init__(self, model_name: str = "llama3"):
38
- """
39
- Open-source multi-modal agent with:
40
- - Web search
41
- - Document processing
42
- - Speech-to-text
43
- - URL content fetching
44
- """
45
- # Initialize LLM (local via Ollama)
46
- self.llm = Ollama(model=model_name, temperature=0.7)
47
-
48
- # Initialize tools
49
- self.search_tool = DuckDuckGoSearchRun()
50
- self.wikipedia_tool = WikipediaQueryRun()
51
- self.requests_tool = TextRequestsWrapper()
52
-
53
- # Speech recognition
54
- self.recognizer = sr.Recognizer()
55
 
56
- # Initialize agent
57
- self.tools = self._initialize_tools()
58
- self.agent = self._create_agent()
59
-
60
- def _initialize_tools(self) -> list[Tool]:
61
- """Initialize all available tools"""
62
- return [
63
- Tool(
64
- name="Web Search",
65
- func=self.search_tool.run,
66
- description="For current events/unknown topics"
67
- ),
68
- Tool(
69
- name="Wikipedia",
70
- func=self.wikipedia_tool.run,
71
- description="For factual information"
72
- ),
73
- Tool(
74
- name="Document Loader",
75
- func=self.process_document,
76
- description="Processes PDF, Word, CSV files"
77
- ),
78
- Tool(
79
- name="Speech Transcription",
80
- func=self.transcribe_audio,
81
- description="Converts speech from audio files to text"
82
- ),
83
- Tool(
84
- name="Website Content",
85
- func=self.requests_tool.get,
86
- description="Fetches content from URLs"
87
- )
88
- ]
89
-
90
- def _create_agent(self) -> AgentExecutor:
91
- """Create the agent executor"""
92
- return initialize_agent(
93
- tools=self.tools,
94
- llm=self.llm,
95
- agent="structured-chat-react",
96
- verbose=True,
97
- handle_parsing_errors=True
98
- )
99
 
100
  def process_document(self, file_path: str) -> str:
101
- """Handle different document types"""
102
  if not os.path.exists(file_path):
103
  return "File not found"
104
 
@@ -106,71 +85,57 @@ class BasicAgent:
106
 
107
  try:
108
  if ext == '.pdf':
109
- loader = PyPDFLoader(file_path)
 
 
110
  elif ext in ('.doc', '.docx'):
111
- loader = UnstructuredWordDocumentLoader(file_path)
 
112
  elif ext == '.csv':
113
- loader = CSVLoader(file_path)
 
 
114
  else:
115
  return "Unsupported file format"
116
-
117
- docs = loader.load()
118
- return "\n".join([doc.page_content for doc in docs])
119
-
120
  except Exception as e:
121
  return f"Error processing document: {str(e)}"
122
 
123
- def _convert_audio_format(self, audio_path: str) -> str:
124
- """Convert audio to WAV format if needed"""
125
- if audio_path.endswith('.wav'):
126
- return audio_path
127
-
128
- try:
129
- sound = AudioSegment.from_file(audio_path)
130
- wav_path = os.path.splitext(audio_path)[0] + ".wav"
131
- sound.export(wav_path, format="wav")
132
- return wav_path
133
- except:
134
- return audio_path # Fallback to original if conversion fails
135
-
136
  def transcribe_audio(self, audio_path: str) -> str:
137
- """Convert speech to text using purely open-source tools"""
138
- audio_path = self._convert_audio_format(audio_path)
139
-
140
  try:
 
 
 
 
 
 
141
  with sr.AudioFile(audio_path) as source:
142
  audio = self.recognizer.record(source)
143
- return self.recognizer.recognize_vosk(audio) # Offline recognition
144
- except sr.UnknownValueError:
145
- try:
146
- # Fallback to Sphinx if Vosk fails
147
- return self.recognizer.recognize_sphinx(audio)
148
- except Exception as e:
149
- return f"Transcription failed: {str(e)}"
150
-
151
- def run(self, input_data: Union[str, dict]) -> str:
152
  """
153
- Handle different input types:
154
- - Text queries
155
- - File paths
156
- - Structured requests
157
  """
158
- if isinstance(input_data, dict):
159
- if 'query' in input_data:
160
- return self.agent.run(input_data['query'])
161
- elif 'file' in input_data:
162
- content = self.process_document(input_data['file'])
163
- return self.agent.run(f"Process this: {content}")
164
- elif isinstance(input_data, str):
165
- if input_data.endswith(('.pdf', '.docx', '.csv')):
166
- content = self.process_document(input_data)
167
- return self.agent.run(f"Process this document: {content}")
168
- elif input_data.endswith(('.wav', '.mp3', '.ogg')):
169
- content = self.transcribe_audio(input_data)
170
- return self.agent.run(f"Process this transcript: {content}")
171
- else:
172
- return self.agent.run(input_data)
173
- return "Unsupported input type"
174
 
175
 
176
 
 
2
  import gradio as gr
3
  import requests
4
 
5
+ import json
6
+ from typing import List, Dict, Union
 
 
 
 
 
 
 
 
 
 
 
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
9
+ import wikipediaapi
10
+ import pandas as pd
11
+ from PyPDF2 import PdfReader
12
+ from docx import Document
13
 
14
  # (Keep Constants as is)
15
  # --- Constants ---
 
18
 
19
  # --- Basic Agent Definition ---
20
  class BasicAgent:
21
+ def __init__(self, ollama_base_url: str = "http://localhost:11434"):
22
+ """
23
+ Pure Python agent with:
24
+ - Local LLM via Ollama
25
+ - Web search (SearxNG)
26
+ - Wikipedia access
27
+ - Document processing
28
+ - Speech-to-text
29
+ """
30
+ self.ollama_url = f"{ollama_base_url}/api/generate"
31
+ self.searx_url = "https://searx.space/search" # Public Searx instance
32
+ self.wiki = wikipediaapi.Wikipedia('en')
33
+ self.recognizer = sr.Recognizer()
34
+
35
  print("BasicAgent initialized.")
36
 
37
  def __call__(self, question: str) -> str:
 
40
  print(f"Agent returning answer: {fixed_answer}")
41
  return fixed_answer
42
 
43
+
44
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ def call_llm(self, prompt: str, model: str = "llama3") -> str:
47
+ """Call local Ollama LLM"""
48
+ payload = {
49
+ "model": model,
50
+ "prompt": prompt,
51
+ "stream": False
52
+ }
53
+ try:
54
+ response = requests.post(self.ollama_url, json=payload)
55
+ response.raise_for_status()
56
+ return response.json().get("response", "")
57
+ except requests.RequestException as e:
58
+ return f"LLM Error: {str(e)}"
59
+
60
+ def web_search(self, query: str) -> List[Dict]:
61
+ """Use SearxNG meta-search engine"""
62
+ params = {
63
+ "q": query,
64
+ "format": "json",
65
+ "engines": "google,bing,duckduckgo"
66
+ }
67
+ try:
68
+ response = requests.get(self.searx_url, params=params)
69
+ response.raise_for_status()
70
+ return response.json().get("results", [])
71
+ except requests.RequestException:
72
+ return []
73
+
74
+ def wikipedia_search(self, query: str) -> str:
75
+ """Get Wikipedia summary"""
76
+ page = self.wiki.page(query)
77
+ return page.summary if page.exists() else "No Wikipedia page found"
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  def process_document(self, file_path: str) -> str:
80
+ """Handle PDF, Word, CSV, Excel files"""
81
  if not os.path.exists(file_path):
82
  return "File not found"
83
 
 
85
 
86
  try:
87
  if ext == '.pdf':
88
+ with open(file_path, 'rb') as f:
89
+ reader = PdfReader(f)
90
+ return "\n".join([page.extract_text() for page in reader.pages])
91
  elif ext in ('.doc', '.docx'):
92
+ doc = Document(file_path)
93
+ return "\n".join([para.text for para in doc.paragraphs])
94
  elif ext == '.csv':
95
+ return pd.read_csv(file_path).to_string()
96
+ elif ext in ('.xls', '.xlsx'):
97
+ return pd.read_excel(file_path).to_string()
98
  else:
99
  return "Unsupported file format"
 
 
 
 
100
  except Exception as e:
101
  return f"Error processing document: {str(e)}"
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def transcribe_audio(self, audio_path: str) -> str:
104
+ """Convert speech to text using Vosk (offline)"""
 
 
105
  try:
106
+ # Convert to WAV if needed
107
+ if not audio_path.endswith('.wav'):
108
+ sound = AudioSegment.from_file(audio_path)
109
+ audio_path = "temp.wav"
110
+ sound.export(audio_path, format="wav")
111
+
112
  with sr.AudioFile(audio_path) as source:
113
  audio = self.recognizer.record(source)
114
+ return self.recognizer.recognize_vosk(audio)
115
+ except Exception as e:
116
+ return f"Transcription failed: {str(e)}"
117
+
118
+ def process_request(self, request: Union[str, Dict]) -> str:
 
 
 
 
119
  """
120
+ Handle different request types:
121
+ - Direct text queries
122
+ - File processing requests
123
+ - Complex multi-step requests
124
  """
125
+ if isinstance(request, dict):
126
+ # Complex request handling
127
+ if 'steps' in request:
128
+ results = []
129
+ for step in request['steps']:
130
+ if step['type'] == 'search':
131
+ results.append(self.web_search(step['query']))
132
+ elif step['type'] == 'process':
133
+ results.append(self.process_document(step['file']))
134
+ return self.call_llm(f"Process these results: {json.dumps(results)}")
135
+ return "Unsupported request format"
136
+
137
+ # Simple text query
138
+ return self.call_llm(request)
 
 
139
 
140
 
141