import streamlit as st import pandas as pd import faiss import numpy as np from datasets import load_dataset from sentence_transformers import SentenceTransformer from groq import Groq import os # -------------------------- # Configuration & Styling # -------------------------- st.set_page_config( page_title="CineMaster AI - Movie Expert", page_icon="🎬", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # -------------------------- # Data Loading & Processing # -------------------------- @st.cache_resource def load_movie_data(): # Option 1: Try loading with trust_remote_code try: dataset = load_dataset( "facebook/wiki_movies", split="train", trust_remote_code=True # Explicitly allow trusted code ) df = pd.DataFrame(dataset) df['context'] = "Question: " + df['question'].str.strip() + "\n" + \ "Answer: " + df['answer'].str.strip() return df except Exception as e: # Option 2: Fallback to synthetic data st.warning("Using high-quality synthetic movie data") return pd.DataFrame([ { "context": "Title: The Dark Knight\nPlot: Batman faces the Joker...\nYear: 2008\nCast: Christian Bale, Heath Ledger\nDirector: Christopher Nolan" }, { "context": "Title: Inception\nPlot: A thief who enters dreams...\nYear: 2010\nCast: Leonardo DiCaprio\nDirector: Christopher Nolan" }, { "context": "Title: Pulp Fiction\nPlot: Interconnected stories of criminals...\nYear: 1994\nCast: John Travolta\nDirector: Quentin Tarantino" } ]) @st.cache_resource def setup_retrieval(df): embedder = SentenceTransformer('all-MiniLM-L6-v2') embeddings = embedder.encode(df['context'].tolist()) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return embedder, index # -------------------------- # Groq API Functions # -------------------------- def get_groq_response(query, context): try: client = Groq(api_key=os.getenv("GROQ_API_KEY", "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")) prompt = f"""You are a film expert analyzing this question: Question: {query} Using these verified sources: {context} Provide a detailed response with: 1. 🎬 Direct Answer 2. 📖 Explanation 3. 🎥 Relevant Scenes 4. 🏆 Awards/Trivia (if available) """ response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-70b-8192", temperature=0.3 ) return response.choices[0].message.content except Exception as e: return f"Error getting response: {str(e)}" # -------------------------- # Main Application # -------------------------- def main(): # Load data and models df = load_movie_data() embedder, index = setup_retrieval(df) # Header Section st.markdown("""