import gradio as gr
from hf_retrieval import *
import requests

def ask_llama_about_chunks(question):
    top_chunks = retrieval(question)

    merged_chunks = merge_chunks(top_chunks)

    context_text = "\n\n".join(
        f"[{i + 1}] [{chunk['video_id']}] [{chunk['start_time']}] - [{chunk['end_time']}]: {chunk['subtitle']}" for i, chunk in enumerate(merged_chunks)
    )

    prompt = f"""You are an assistant helping users understand video content.
Here are relevant pieces of text from a video, each labeled [1] through [10].

QUESTION: "{question}"

Based only on the context, which segment of text best answers the users question or comment. Prioritize longer responses. Return ONLY the number of the most relevant chunk (e.g., '1').
Do NOT return any explanation.

CONTEXT:
{context_text}

ANSWER:"""

    response = requests.post(
        "https://api-inference.huggingface.co/models/meta-llama/Llama-3-8b-chat-hf",
        headers={"Authorization": f"Bearer {os.environ['HF_API_TOKEN']}"},
        json={"inputs": prompt}
    )
    
    data = response.json()
    answer = data.get("generated_text", "").strip()

    try:
        best_chunk_index = int(answer) - 1
        best_chunk = merged_chunks[best_chunk_index]
    except:
        best_chunk = merged_chunks[0]

    video_clip_path = get_video_segment(best_chunk['video_id'], best_chunk["start_time"], best_chunk["end_time"])

    return best_chunk['subtitle'], gr.Video(video_clip_path)

iface = gr.Interface(
    fn=ask_llama_about_chunks,
    inputs=[
        gr.Textbox(label="Add your question here:")
    ],
    outputs=[
        gr.Textbox(label="Best Matching Transcript"),
        gr.Video(label="Relevant Clip")
    ],
    title = "Chat With Your Video Library",
    description = "Ask questions about machine learning and the most relevant video segment and captions will be returned.",
)

if __name__ == "__main__":
    iface.launch()