import os import time import requests import moviepy.editor import whisper import gradio as gr import pandas as pd import wikipedia from duckduckgo_search import DDGS from transformers import pipeline class SmartAgent: def __init__(self): print("SmartAgent initialized.") self.whisper_model = whisper.load_model("base") self.qa_pipeline = pipeline("question-answering") def call_whisper(self, video_path: str) -> str: video = moviepy.editor.VideoFileClip(video_path) audio_path = "temp_audio.wav" video.audio.write_audiofile(audio_path) result = self.whisper_model.transcribe(audio_path) return result["text"] def classify_question(self, question: str) -> str: q = question.lower() if "how many" in q or "number of" in q: return "count" elif "when" in q or "what year" in q: return "date" return "open" def wiki_search(self, question: str) -> str: try: results = wikipedia.search(question, results=1) if not results: return "" page = wikipedia.page(results[0]) return page.content except Exception as e: return "" def fallback_duckduckgo(self, question: str) -> str: try: with DDGS() as ddgs: results = list(ddgs.text(question, max_results=3)) return results[0]["body"] if results else "" except Exception as e: return "" def answer_with_context(self, question: str, context: str) -> str: try: if not context.strip(): return "No reliable context found." result = self.qa_pipeline(question=question, context=context) return result["answer"] except Exception as e: return f"QA error: {e}" def __call__(self, question: str, video_path: str = None) -> str: print(f"Agent received question: {question[:60]}...") if video_path: return self.call_whisper(video_path) classification = self.classify_question(question) context = self.wiki_search(question) if not context: context = self.fallback_duckduckgo(question) return self.answer_with_context(question, context)