Spaces:
Running
Running
import gradio as gr | |
from hf_retrieval import * | |
import requests | |
def ask_llama_about_chunks(question): | |
top_chunks = retrieval(question) | |
merged_chunks = merge_chunks(top_chunks) | |
context_text = "\n\n".join( | |
f"[{i + 1}] [{chunk['video_id']}] [{chunk['start_time']}] - [{chunk['end_time']}]: {chunk['subtitle']}" for i, chunk in enumerate(merged_chunks) | |
) | |
prompt = f"""You are an assistant helping users understand video content. | |
Here are relevant pieces of text from a video, each labeled [1] through [10]. | |
QUESTION: "{question}" | |
Based only on the context, which segment of text best answers the users question or comment. Prioritize longer responses. Return ONLY the number of the most relevant chunk (e.g., '1'). | |
Do NOT return any explanation. | |
CONTEXT: | |
{context_text} | |
ANSWER:""" | |
response = requests.post( | |
"https://api-inference.huggingface.co/models/meta-llama/Llama-3-8b-chat-hf", | |
headers={"Authorization": f"Bearer {os.environ['HF_API_TOKEN']}"}, | |
json={"inputs": prompt} | |
) | |
data = response.json() | |
answer = data.get("generated_text", "").strip() | |
try: | |
best_chunk_index = int(answer) - 1 | |
best_chunk = merged_chunks[best_chunk_index] | |
except: | |
best_chunk = merged_chunks[0] | |
video_clip_path = get_video_segment(best_chunk['video_id'], best_chunk["start_time"], best_chunk["end_time"]) | |
return best_chunk['subtitle'], gr.Video(video_clip_path) | |
iface = gr.Interface( | |
fn=ask_llama_about_chunks, | |
inputs=[ | |
gr.Textbox(label="Add your question here:") | |
], | |
outputs=[ | |
gr.Textbox(label="Best Matching Transcript"), | |
gr.Video(label="Relevant Clip") | |
], | |
title = "Chat With Your Video Library", | |
description = "Ask questions about machine learning and the most relevant video segment and captions will be returned.", | |
) | |
if __name__ == "__main__": | |
iface.launch() |