wt002 commited on
Commit
6b81dc2
·
verified ·
1 Parent(s): 836d49c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -106
app.py CHANGED
@@ -1,16 +1,15 @@
1
  import os
 
2
  import gradio as gr
3
  import requests
4
 
5
- import os
6
- import requests
7
- import json
8
  from typing import List, Dict, Union
9
- from pydub import AudioSegment
10
  import wikipediaapi
11
  import pandas as pd
12
- from PyPDF2 import PdfReader
13
- from docx import Document
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
@@ -19,96 +18,34 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
  # --- Basic Agent Definition ---
21
  class BasicAgent:
22
- def __init__(self, ollama_base_url: str = "http://localhost:11434"):
23
- self.ollama_url = f"{ollama_base_url}/api/generate"
24
- self.searx_url = "https://searx.space/search"
 
 
 
 
 
25
  self.wiki = wikipediaapi.Wikipedia('en')
 
26
 
27
  print("BasicAgent initialized.")
28
 
29
  def __call__(self, question: str) -> str:
30
  print(f"Agent received question (first 50 chars): {question[:50]}...")
31
- fixed_answer = self.agent.run(question)
32
  print(f"Agent returning answer: {fixed_answer}")
33
  return fixed_answer
34
 
35
- # Initialize Vosk if available
36
- self.vosk_model = None
37
- try:
38
- from vosk import Model, KaldiRecognizer
39
- model_path = "vosk-model-small-en-us-0.15"
40
- if os.path.exists(model_path):
41
- self.vosk_model = Model(model_path)
42
- except ImportError:
43
- pass
44
-
45
- def transcribe_audio(self, audio_path: str) -> str:
46
- """Speech-to-text using Vosk or basic audio processing"""
47
- # Convert to WAV if needed
48
- if not audio_path.endswith('.wav'):
49
- try:
50
- sound = AudioSegment.from_file(audio_path)
51
- audio_path = "temp.wav"
52
- sound.export(audio_path, format="wav")
53
- except:
54
- return "Audio conversion failed"
55
-
56
- # Try Vosk first if available
57
- if self.vosk_model:
58
- try:
59
- from vosk import KaldiRecognizer
60
- import wave
61
- wf = wave.open(audio_path, "rb")
62
- rec = KaldiRecognizer(self.vosk_model, wf.getframerate())
63
-
64
- results = []
65
- while True:
66
- data = wf.readframes(4000)
67
- if len(data) == 0:
68
- break
69
- if rec.AcceptWaveform(data):
70
- results.append(json.loads(rec.Result()))
71
-
72
- final = json.loads(rec.FinalResult())
73
- if final['text']:
74
- results.append(final)
75
- return " ".join([r['text'] for r in results if 'text' in r])
76
- except Exception as e:
77
- return f"Vosk Error: {str(e)}"
78
-
79
- # Fallback: Return audio metadata
80
- try:
81
- sound = AudioSegment.from_file(audio_path)
82
- return f"Audio file: {sound.duration_seconds} seconds, {sound.channels} channels"
83
- except:
84
- return "Audio processing failed"
85
 
86
- def transcribe_audio(self, audio_path: str) -> str:
87
- """Speech-to-text using Vosk or basic audio processing"""
88
- # Convert to WAV if needed
89
- if not audio_path.endswith('.wav'):
90
- try:
91
- sound = AudioSegment.from_file(audio_path)
92
- audio_path = "temp.wav"
93
- sound.export(audio_path, format="wav")
94
- except:
95
- return "Audio conversion failed"
96
 
97
-
98
-
99
- def call_llm(self, prompt: str, model: str = "llama3") -> str:
100
- """Call local Ollama LLM"""
101
- payload = {
102
- "model": model,
103
- "prompt": prompt,
104
- "stream": False
105
- }
106
  try:
107
- response = requests.post(self.ollama_url, json=payload)
108
- response.raise_for_status()
109
- return response.json().get("response", "")
110
- except requests.RequestException as e:
111
- return f"LLM Error: {str(e)}"
112
 
113
  def web_search(self, query: str) -> List[Dict]:
114
  """Use SearxNG meta-search engine"""
@@ -138,12 +75,9 @@ class BasicAgent:
138
 
139
  try:
140
  if ext == '.pdf':
141
- with open(file_path, 'rb') as f:
142
- reader = PdfReader(f)
143
- return "\n".join([page.extract_text() for page in reader.pages])
144
  elif ext in ('.doc', '.docx'):
145
- doc = Document(file_path)
146
- return "\n".join([para.text for para in doc.paragraphs])
147
  elif ext == '.csv':
148
  return pd.read_csv(file_path).to_string()
149
  elif ext in ('.xls', '.xlsx'):
@@ -153,20 +87,34 @@ class BasicAgent:
153
  except Exception as e:
154
  return f"Error processing document: {str(e)}"
155
 
156
- def transcribe_audio(self, audio_path: str) -> str:
157
- """Convert speech to text using Vosk (offline)"""
158
  try:
159
- # Convert to WAV if needed
160
- if not audio_path.endswith('.wav'):
161
- sound = AudioSegment.from_file(audio_path)
162
- audio_path = "temp.wav"
163
- sound.export(audio_path, format="wav")
164
-
165
- with sr.AudioFile(audio_path) as source:
166
- audio = self.recognizer.record(source)
167
- return self.recognizer.recognize_vosk(audio)
168
- except Exception as e:
169
- return f"Transcription failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  def process_request(self, request: Union[str, Dict]) -> str:
172
  """
@@ -176,7 +124,6 @@ class BasicAgent:
176
  - Complex multi-step requests
177
  """
178
  if isinstance(request, dict):
179
- # Complex request handling
180
  if 'steps' in request:
181
  results = []
182
  for step in request['steps']:
@@ -184,11 +131,11 @@ class BasicAgent:
184
  results.append(self.web_search(step['query']))
185
  elif step['type'] == 'process':
186
  results.append(self.process_document(step['file']))
187
- return self.call_llm(f"Process these results: {json.dumps(results)}")
188
  return "Unsupported request format"
189
 
190
- # Simple text query
191
- return self.call_llm(request)
192
 
193
 
194
 
 
1
  import os
2
+ from dotenv import load_dotenv
3
  import gradio as gr
4
  import requests
5
 
6
+ import google.generativeai as genai
 
 
7
  from typing import List, Dict, Union
8
+ import requests
9
  import wikipediaapi
10
  import pandas as pd
11
+
12
+ load_dotenv()
13
 
14
  # (Keep Constants as is)
15
  # --- Constants ---
 
18
 
19
  # --- Basic Agent Definition ---
20
  class BasicAgent:
21
+ def __init__(self, model_name: str = "gemini-pro"):
22
+ """
23
+ Multi-modal agent powered by Google Gemini with:
24
+ - Web search
25
+ - Wikipedia access
26
+ - Document processing
27
+ """
28
+ self.model = genai.GenerativeModel(model_name)
29
  self.wiki = wikipediaapi.Wikipedia('en')
30
+ self.searx_url = "https://searx.space/search" # Public Searx instance
31
 
32
  print("BasicAgent initialized.")
33
 
34
  def __call__(self, question: str) -> str:
35
  print(f"Agent received question (first 50 chars): {question[:50]}...")
36
+ fixed_answer = self.agent.process_request(question)
37
  print(f"Agent returning answer: {fixed_answer}")
38
  return fixed_answer
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ def generate_response(self, prompt: str) -> str:
43
+ """Get response from Gemini"""
 
 
 
 
 
 
 
44
  try:
45
+ response = self.model.generate_content(prompt)
46
+ return response.text
47
+ except Exception as e:
48
+ return f"Error generating response: {str(e)}"
 
49
 
50
  def web_search(self, query: str) -> List[Dict]:
51
  """Use SearxNG meta-search engine"""
 
75
 
76
  try:
77
  if ext == '.pdf':
78
+ return self._process_pdf(file_path)
 
 
79
  elif ext in ('.doc', '.docx'):
80
+ return self._process_word(file_path)
 
81
  elif ext == '.csv':
82
  return pd.read_csv(file_path).to_string()
83
  elif ext in ('.xls', '.xlsx'):
 
87
  except Exception as e:
88
  return f"Error processing document: {str(e)}"
89
 
90
+ def _process_pdf(self, file_path: str) -> str:
91
+ """Process PDF using Gemini's vision capability"""
92
  try:
93
+ # For Gemini 1.5 or later which supports file uploads
94
+ with open(file_path, "rb") as f:
95
+ file = genai.upload_file(f)
96
+ response = self.model.generate_content(
97
+ ["Extract and summarize the key points from this document:", file]
98
+ )
99
+ return response.text
100
+ except:
101
+ # Fallback for older Gemini versions
102
+ try:
103
+ import PyPDF2
104
+ with open(file_path, 'rb') as f:
105
+ reader = PyPDF2.PdfReader(f)
106
+ return "\n".join([page.extract_text() for page in reader.pages])
107
+ except ImportError:
108
+ return "PDF processing requires PyPDF2 (pip install PyPDF2)"
109
+
110
+ def _process_word(self, file_path: str) -> str:
111
+ """Process Word documents"""
112
+ try:
113
+ from docx import Document
114
+ doc = Document(file_path)
115
+ return "\n".join([para.text for para in doc.paragraphs])
116
+ except ImportError:
117
+ return "Word processing requires python-docx (pip install python-docx)"
118
 
119
  def process_request(self, request: Union[str, Dict]) -> str:
120
  """
 
124
  - Complex multi-step requests
125
  """
126
  if isinstance(request, dict):
 
127
  if 'steps' in request:
128
  results = []
129
  for step in request['steps']:
 
131
  results.append(self.web_search(step['query']))
132
  elif step['type'] == 'process':
133
  results.append(self.process_document(step['file']))
134
+ return self.generate_response(f"Process these results: {results}")
135
  return "Unsupported request format"
136
 
137
+ return self.generate_response(request)
138
+
139
 
140
 
141