Toumaima's picture
Update app.py
9f6cf12 verified
raw
history blame
5.42 kB
import re
import spacy
from transformers import pipeline
from duckduckgo_search import DDGS
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import whisper
import moviepy.editor
class BasicAgent:
def __init__(self):
print("BasicAgent initialized.")
self.whisper_model = whisper.load_model("base")
self.qa_pipeline = pipeline("question-answering")
self.ner_pipeline = pipeline("ner", aggregation_strategy="simple")
self.embedding_model = pipeline("feature-extraction")
self.spacy = spacy.load("en_core_web_sm")
def extract_named_entities(self, text):
entities = self.ner_pipeline(text)
return [e["word"] for e in entities if e["entity_group"] == "PER"]
def extract_numbers(self, text):
return re.findall(r"\d+", text)
def extract_keywords(self, text):
doc = self.spacy(text)
return [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
def call_whisper(self, video_path: str) -> str:
video = moviepy.editor.VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path)
result = self.whisper_model.transcribe(audio_path)
return result["text"]
def search(self, question: str) -> str:
try:
with DDGS() as ddgs:
results = list(ddgs.text(question, max_results=3))
if not results:
return "No relevant search results found."
context = results[0]["body"]
return context
except Exception as e:
return f"Search error: {e}"
def answer_question(self, question: str, context: str) -> str:
try:
return self.qa_pipeline(question=question, context=context)["answer"]
except:
return context # Fallback to context if QA fails
def __call__(self, question: str, video_path: str = None) -> str:
print(f"Agent received question: {question[:60]}...")
if video_path:
transcription = self.call_whisper(video_path)
print(f"Transcribed video: {transcription[:100]}...")
return transcription
context = self.search(question)
answer = self.answer_question(question, context)
q_lower = question.lower()
# Enhance based on question type
if "who" in q_lower:
people = self.extract_named_entities(context)
return f"👤 Who: {', '.join(people) if people else 'No person found'}\n\n🧠 Answer: {answer}"
elif "how many" in q_lower:
numbers = self.extract_numbers(context)
return f"🔢 How many: {', '.join(numbers) if numbers else 'No numbers found'}\n\n🧠 Answer: {answer}"
elif "how" in q_lower:
return f"⚙️ How: {answer}"
elif "what" in q_lower or "where" in q_lower:
keywords = self.extract_keywords(context)
return f"🗝️ Keywords: {', '.join(keywords[:5])}\n\n🧠 Answer: {answer}"
else:
return f"🧠 Answer: {answer}"
# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
---
**Disclaimers:**
Once clicking on the "submit button, it can take quite some time (this is the time for the agent to go through all the questions).
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"✅ SPACE_HOST found: {space_host_startup}")
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup: # Print repo URLs if SPACE_ID is found
print(f"✅ SPACE_ID found: {space_id_startup}")
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
print("-"*(60 + len(" App Starting ")) + "\n")
print("Launching Gradio Interface for Basic Agent Evaluation...")
demo.launch(debug=True, share=False)