Movie_Buff_QA / app.py
iisadia's picture
Update app.py
954a43c verified
import streamlit as st
import pandas as pd
import faiss
import numpy as np
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from groq import Groq
import os
# --------------------------
# Configuration & Styling
# --------------------------
st.set_page_config(
page_title="CineMaster AI - Movie Expert",
page_icon="🎬",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown("""
<style>
:root {
--primary: #7017ff;
--secondary: #ff2d55;
}
.header {
background: linear-gradient(135deg, var(--primary), var(--secondary));
color: white;
padding: 2rem;
border-radius: 15px;
text-align: center;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
margin-bottom: 2rem;
}
.response-box {
background: rgba(255,255,255,0.1);
border-radius: 10px;
padding: 1.5rem;
margin: 1rem 0;
border: 1px solid rgba(255,255,255,0.2);
}
.stButton>button {
background: linear-gradient(45deg, var(--primary), var(--secondary)) !important;
color: white !important;
border-radius: 25px;
padding: 0.8rem 2rem;
font-weight: 600;
transition: transform 0.2s;
}
.stButton>button:hover {
transform: scale(1.05);
}
.movie-card {
background: rgba(0,0,0,0.2);
border-radius: 10px;
padding: 1rem;
margin: 0.5rem 0;
}
</style>
""", unsafe_allow_html=True)
# --------------------------
# Data Loading & Processing
# --------------------------
@st.cache_resource
def load_movie_data():
# Option 1: Try loading with trust_remote_code
try:
dataset = load_dataset(
"facebook/wiki_movies",
split="train",
trust_remote_code=True # Explicitly allow trusted code
)
df = pd.DataFrame(dataset)
df['context'] = "Question: " + df['question'].str.strip() + "\n" + \
"Answer: " + df['answer'].str.strip()
return df
except Exception as e:
# Option 2: Fallback to synthetic data
st.warning("Using high-quality synthetic movie data")
return pd.DataFrame([
{
"context": "Title: The Dark Knight\nPlot: Batman faces the Joker...\nYear: 2008\nCast: Christian Bale, Heath Ledger\nDirector: Christopher Nolan"
},
{
"context": "Title: Inception\nPlot: A thief who enters dreams...\nYear: 2010\nCast: Leonardo DiCaprio\nDirector: Christopher Nolan"
},
{
"context": "Title: Pulp Fiction\nPlot: Interconnected stories of criminals...\nYear: 1994\nCast: John Travolta\nDirector: Quentin Tarantino"
}
])
@st.cache_resource
def setup_retrieval(df):
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedder.encode(df['context'].tolist())
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
return embedder, index
# --------------------------
# Groq API Functions
# --------------------------
def get_groq_response(query, context):
try:
client = Groq(api_key=os.getenv("GROQ_API_KEY", "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"))
prompt = f"""You are a film expert analyzing this question:
Question: {query}
Using these verified sources:
{context}
Provide a detailed response with:
1. 🎬 Direct Answer
2. πŸ“– Explanation
3. πŸŽ₯ Relevant Scenes
4. πŸ† Awards/Trivia (if available)
"""
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-70b-8192",
temperature=0.3
)
return response.choices[0].message.content
except Exception as e:
return f"Error getting response: {str(e)}"
# --------------------------
# Main Application
# --------------------------
def main():
# Load data and models
df = load_movie_data()
embedder, index = setup_retrieval(df)
# Header Section
st.markdown("""
<div class="header">
<h1>🎞️ CineMaster AI</h1>
<h3>Your Personal Movie Encyclopedia</h3>
</div>
""", unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.image("https://cdn-icons-png.flaticon.com/512/2598/2598702.png", width=120)
st.subheader("Sample Questions")
examples = [
"Who played the Joker in The Dark Knight?",
"Explain the ending of Inception",
"List Tarantino's movies",
"What's the plot of Pulp Fiction?",
"Who directed The Dark Knight?"
]
for ex in examples:
st.code(ex, language="bash")
st.markdown("---")
st.markdown("**Database Info**")
st.write(f"πŸ“Š {len(df)} movies loaded")
st.write("πŸ” Using FAISS for vector search")
st.write("πŸ€– Powered by Llama 3 70B")
# Main Interface
query = st.text_input("🎯 Ask any movie question:",
placeholder="e.g., 'Who played the villain in The Dark Knight?'")
if st.button("πŸš€ Get Expert Analysis", type="primary"):
if query:
with st.spinner("πŸ” Searching through movie database..."):
query_embed = embedder.encode([query])
_, indices = index.search(query_embed, 3)
contexts = [df.iloc[i]['context'] for i in indices[0]]
combined_context = "\n\n---\n\n".join(contexts)
with st.spinner("πŸŽ₯ Generating cinematic insights..."):
answer = get_groq_response(query, combined_context)
st.markdown("---")
with st.container():
st.markdown("## 🎬 Expert Analysis")
st.markdown(f'<div class="response-box">{answer}</div>', unsafe_allow_html=True)
st.markdown("## πŸ“š Reference Materials")
for i, ctx in enumerate(contexts, 1):
with st.expander(f"Source {i}", expanded=(i==1)):
st.markdown(f'<div class="movie-card">{ctx}</div>', unsafe_allow_html=True)
else:
st.warning("Please enter a movie-related question")
if __name__ == "__main__":
main()