Spaces:

sg7849
/

ml_rag_assistant

Running

App Files Files Community

ml_rag_assistant / app.py

sg7849

app, finetuned_clip.pt, retrieval and requirement files

6d30351 verified 6 days ago

raw

history blame contribute delete

1.92 kB

	import gradio as gr
	from hf_retrieval import *
	import requests

	def ask_llama_about_chunks(question):
	top_chunks = retrieval(question)

	merged_chunks = merge_chunks(top_chunks)

	context_text = "\n\n".join(
	f"[{i + 1}] [{chunk['video_id']}] [{chunk['start_time']}] - [{chunk['end_time']}]: {chunk['subtitle']}" for i, chunk in enumerate(merged_chunks)
	)

	prompt = f"""You are an assistant helping users understand video content.
	Here are relevant pieces of text from a video, each labeled [1] through [10].

	QUESTION: "{question}"

	Based only on the context, which segment of text best answers the users question or comment. Prioritize longer responses. Return ONLY the number of the most relevant chunk (e.g., '1').
	Do NOT return any explanation.

	CONTEXT:
	{context_text}

	ANSWER:"""

	response = requests.post(
	"https://api-inference.huggingface.co/models/meta-llama/Llama-3-8b-chat-hf",
	headers={"Authorization": f"Bearer {os.environ['HF_API_TOKEN']}"},
	json={"inputs": prompt}
	)

	data = response.json()
	answer = data.get("generated_text", "").strip()

	try:
	best_chunk_index = int(answer) - 1
	best_chunk = merged_chunks[best_chunk_index]
	except:
	best_chunk = merged_chunks[0]

	video_clip_path = get_video_segment(best_chunk['video_id'], best_chunk["start_time"], best_chunk["end_time"])

	return best_chunk['subtitle'], gr.Video(video_clip_path)

	iface = gr.Interface(
	fn=ask_llama_about_chunks,
	inputs=[
	gr.Textbox(label="Add your question here:")
	],
	outputs=[
	gr.Textbox(label="Best Matching Transcript"),
	gr.Video(label="Relevant Clip")
	],
	title = "Chat With Your Video Library",
	description = "Ask questions about machine learning and the most relevant video segment and captions will be returned.",
	)

	if __name__ == "__main__":
	iface.launch()