import streamlit as st import pandas as pd import faiss import numpy as np from datasets import load_dataset from sentence_transformers import SentenceTransformer from groq import Groq import os # -------------------------- # Configuration & Styling # -------------------------- st.set_page_config( page_title="CineMaster AI - Movie Expert", page_icon="🎬", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # -------------------------- # Data Loading & Processing # -------------------------- @st.cache_resource def load_movie_data(): # Option 1: Try loading with trust_remote_code try: dataset = load_dataset( "facebook/wiki_movies", split="train", trust_remote_code=True # Explicitly allow trusted code ) df = pd.DataFrame(dataset) df['context'] = "Question: " + df['question'].str.strip() + "\n" + \ "Answer: " + df['answer'].str.strip() return df except Exception as e: # Option 2: Fallback to synthetic data st.warning("Using high-quality synthetic movie data") return pd.DataFrame([ { "context": "Title: The Dark Knight\nPlot: Batman faces the Joker...\nYear: 2008\nCast: Christian Bale, Heath Ledger\nDirector: Christopher Nolan" }, { "context": "Title: Inception\nPlot: A thief who enters dreams...\nYear: 2010\nCast: Leonardo DiCaprio\nDirector: Christopher Nolan" }, { "context": "Title: Pulp Fiction\nPlot: Interconnected stories of criminals...\nYear: 1994\nCast: John Travolta\nDirector: Quentin Tarantino" } ]) @st.cache_resource def setup_retrieval(df): embedder = SentenceTransformer('all-MiniLM-L6-v2') embeddings = embedder.encode(df['context'].tolist()) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return embedder, index # -------------------------- # Groq API Functions # -------------------------- def get_groq_response(query, context): try: client = Groq(api_key=os.getenv("GROQ_API_KEY", "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")) prompt = f"""You are a film expert analyzing this question: Question: {query} Using these verified sources: {context} Provide a detailed response with: 1. 🎬 Direct Answer 2. 📖 Explanation 3. 🎥 Relevant Scenes 4. 🏆 Awards/Trivia (if available) """ response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-70b-8192", temperature=0.3 ) return response.choices[0].message.content except Exception as e: return f"Error getting response: {str(e)}" # -------------------------- # Main Application # -------------------------- def main(): # Load data and models df = load_movie_data() embedder, index = setup_retrieval(df) # Header Section st.markdown("""

🎞️ CineMaster AI

Your Personal Movie Encyclopedia

""", unsafe_allow_html=True) # Sidebar with st.sidebar: st.image("https://cdn-icons-png.flaticon.com/512/2598/2598702.png", width=120) st.subheader("Sample Questions") examples = [ "Who played the Joker in The Dark Knight?", "Explain the ending of Inception", "List Tarantino's movies", "What's the plot of Pulp Fiction?", "Who directed The Dark Knight?" ] for ex in examples: st.code(ex, language="bash") st.markdown("---") st.markdown("**Database Info**") st.write(f"📊 {len(df)} movies loaded") st.write("🔍 Using FAISS for vector search") st.write("🤖 Powered by Llama 3 70B") # Main Interface query = st.text_input("🎯 Ask any movie question:", placeholder="e.g., 'Who played the villain in The Dark Knight?'") if st.button("🚀 Get Expert Analysis", type="primary"): if query: with st.spinner("🔍 Searching through movie database..."): query_embed = embedder.encode([query]) _, indices = index.search(query_embed, 3) contexts = [df.iloc[i]['context'] for i in indices[0]] combined_context = "\n\n---\n\n".join(contexts) with st.spinner("🎥 Generating cinematic insights..."): answer = get_groq_response(query, combined_context) st.markdown("---") with st.container(): st.markdown("## 🎬 Expert Analysis") st.markdown(f'
{answer}
', unsafe_allow_html=True) st.markdown("## 📚 Reference Materials") for i, ctx in enumerate(contexts, 1): with st.expander(f"Source {i}", expanded=(i==1)): st.markdown(f'
{ctx}
', unsafe_allow_html=True) else: st.warning("Please enter a movie-related question") if __name__ == "__main__": main()