Spaces:

iisadia
/

Movie_Buff_QA

Sleeping

App Files Files Community

iisadia commited on Apr 13

Commit

740f48a

verified ·

1 Parent(s): 101edbc

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -47

app.py CHANGED Viewed

@@ -30,6 +30,7 @@ st.markdown("""
         border-radius: 15px;
         text-align: center;
         box-shadow: 0 4px 6px rgba(0,0,0,0.1);
     }
     .response-box {
         background: rgba(255,255,255,0.1);
@@ -49,19 +50,47 @@ st.markdown("""
     .stButton>button:hover {
         transform: scale(1.05);
     }
 </style>
 """, unsafe_allow_html=True)
 # --------------------------
-# Movie Dataset & Embeddings
 # --------------------------
-# Replace load_movie_data() with:
 @st.cache_resource
 def load_movie_data():
-    dataset = load_dataset("wiki_movies", split="train")
-    df = pd.DataFrame(dataset)
-    df['context'] = df.apply(lambda x: f"Title: {x['title']}\nPlot: {x['plot']}\nCast: {x['cast']}", axis=1)
-    return df
 @st.cache_resource
 def setup_retrieval(df):
@@ -73,38 +102,40 @@ def setup_retrieval(df):
     return embedder, index
 # --------------------------
-# Groq API Setup
 # --------------------------
-def get_groq_client():
-    return Groq(
-        api_key=os.getenv("GROQ_API_KEY", "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")
-    )
-def movie_expert(query, context):
-    prompt = f"""You are a film expert. Answer using this context:
-    {context}
-    Question: {query}
-    Format response with:
-    1. 🎥 Direct Answer
-    2. 📖 Detailed Explanation
-    3. 🏆 Key Cast Members
-    4. 🌟 Trivia (if available)
-    """
-    response = client.chat.completions.create(
-        messages=[{"role": "user", "content": prompt}],
-        model="llama3-70b-8192",
-        temperature=0.3
-    )
-    return response.choices[0].message.content
 # --------------------------
 # Main Application
 # --------------------------
 def main():
     df = load_movie_data()
     embedder, index = setup_retrieval(df)
@@ -122,40 +153,44 @@ def main():
         st.subheader("Sample Questions")
         examples = [
             "Who played the Joker in The Dark Knight?",
-            "What's the plot of Inception?",
-            "List Christopher Nolan's movies",
-            "Who directed The Dark Knight?",
-            "What year was Inception released?"
         ]
         for ex in examples:
             st.code(ex, language="bash")
     # Main Interface
     query = st.text_input("🎯 Ask any movie question:",
                         placeholder="e.g., 'Who played the villain in The Dark Knight?'")
-    if st.button("🚀 Get Answer"):
         if query:
-            with st.spinner("🔍 Searching through 10,000+ movie records..."):
                 query_embed = embedder.encode([query])
-                _, indices = index.search(query_embed, 2)
                 contexts = [df.iloc[i]['context'] for i in indices[0]]
-                combined_context = "\n\n".join(contexts)
             with st.spinner("🎥 Generating cinematic insights..."):
-                answer = movie_expert(query, combined_context)
             st.markdown("---")
             with st.container():
                 st.markdown("## 🎬 Expert Analysis")
                 st.markdown(f'<div class="response-box">{answer}</div>', unsafe_allow_html=True)
-                st.markdown("## 📚 Source Materials")
-                cols = st.columns(2)
-                for i, ctx in enumerate(contexts):
-                    with cols[i]:
-                        with st.expander(f"Source {i+1}", expanded=True):
-                            st.write(ctx)
         else:
             st.warning("Please enter a movie-related question")

         border-radius: 15px;
         text-align: center;
         box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+        margin-bottom: 2rem;
     }
     .response-box {
         background: rgba(255,255,255,0.1);
     .stButton>button:hover {
         transform: scale(1.05);
     }
+    .movie-card {
+        background: rgba(0,0,0,0.2);
+        border-radius: 10px;
+        padding: 1rem;
+        margin: 0.5rem 0;
+    }
 </style>
 """, unsafe_allow_html=True)
 # --------------------------
+# Data Loading & Processing
 # --------------------------
 @st.cache_resource
 def load_movie_data():
+    try:
+        # Try loading wiki_movies dataset
+        dataset = load_dataset("wikipedia", "20220301.en", split="train[:5000]")
+        df = pd.DataFrame(dataset)
+        # Create synthetic movie data from Wikipedia snippets
+        df['title'] = df['title'].apply(lambda x: x.replace("_", " "))
+        df['context'] = "Title: " + df['title'] + "\nContent: " + df['text'].str[:500] + "..."
+        return df.sample(1000)  # Return random 1000 entries
+    except Exception as e:
+        st.warning(f"Couldn't load dataset: {str(e)}. Using synthetic data.")
+        movies = [
+            {
+                "title": "The Dark Knight",
+                "context": "Title: The Dark Knight\nPlot: Batman faces the Joker in a battle for Gotham's soul...\nCast: Christian Bale, Heath Ledger\nYear: 2008\nDirector: Christopher Nolan"
+            },
+            {
+                "title": "Inception",
+                "context": "Title: Inception\nPlot: A thief who enters the dreams of others...\nCast: Leonardo DiCaprio, Tom Hardy\nYear: 2010\nDirector: Christopher Nolan"
+            },
+            {
+                "title": "Pulp Fiction",
+                "context": "Title: Pulp Fiction\nPlot: The lives of two mob hitmen, a boxer, and a gangster's wife intertwine...\nCast: John Travolta, Samuel L. Jackson\nYear: 1994\nDirector: Quentin Tarantino"
+            }
+        ]
+        return pd.DataFrame(movies)
 @st.cache_resource
 def setup_retrieval(df):
     return embedder, index
 # --------------------------
+# Groq API Functions
 # --------------------------
+def get_groq_response(query, context):
+    try:
+        client = Groq(api_key=os.getenv("GROQ_API_KEY", "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"))
+        prompt = f"""You are a film expert analyzing this question:
+        Question: {query}
+        Using these verified sources:
+        {context}
+        Provide a detailed response with:
+        1. 🎬 Direct Answer
+        2. 📖 Explanation
+        3. 🎥 Relevant Scenes
+        4. 🏆 Awards/Trivia (if available)
+        """
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}],
+            model="llama3-70b-8192",
+            temperature=0.3
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error getting response: {str(e)}"
 # --------------------------
 # Main Application
 # --------------------------
 def main():
+    # Load data and models
     df = load_movie_data()
     embedder, index = setup_retrieval(df)
         st.subheader("Sample Questions")
         examples = [
             "Who played the Joker in The Dark Knight?",
+            "Explain the ending of Inception",
+            "List Tarantino's movies",
+            "What's the plot of Pulp Fiction?",
+            "Who directed The Dark Knight?"
         ]
         for ex in examples:
             st.code(ex, language="bash")
+        st.markdown("---")
+        st.markdown("**Database Info**")
+        st.write(f"📊 {len(df)} movies loaded")
+        st.write("🔍 Using FAISS for vector search")
+        st.write("🤖 Powered by Llama 3 70B")
     # Main Interface
     query = st.text_input("🎯 Ask any movie question:",
                         placeholder="e.g., 'Who played the villain in The Dark Knight?'")
+    if st.button("🚀 Get Expert Analysis", type="primary"):
         if query:
+            with st.spinner("🔍 Searching through movie database..."):
                 query_embed = embedder.encode([query])
+                _, indices = index.search(query_embed, 3)
                 contexts = [df.iloc[i]['context'] for i in indices[0]]
+                combined_context = "\n\n---\n\n".join(contexts)
             with st.spinner("🎥 Generating cinematic insights..."):
+                answer = get_groq_response(query, combined_context)
             st.markdown("---")
             with st.container():
                 st.markdown("## 🎬 Expert Analysis")
                 st.markdown(f'<div class="response-box">{answer}</div>', unsafe_allow_html=True)
+                st.markdown("## 📚 Reference Materials")
+                for i, ctx in enumerate(contexts, 1):
+                    with st.expander(f"Source {i}", expanded=(i==1)):
+                        st.markdown(f'<div class="movie-card">{ctx}</div>', unsafe_allow_html=True)
         else:
             st.warning("Please enter a movie-related question")