Spaces:

krisha06
/

RAG

Sleeping

App Files Files Community

krisha06 commited on Mar 24

Commit

23b09ce

verified ·

1 Parent(s): 8f45aef

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -60

app.py CHANGED Viewed

@@ -1,71 +1,181 @@
 import streamlit as st
-from datasets import load_dataset
-from sentence_transformers import SentenceTransformer
 import chromadb
-# Load dataset
-# Load dataset
 def load_recipes():
     try:
-        dataset = load_dataset("mbien/recipe_nlg", split="train", trust_remote_code=True)
-        print("✅ Dataset loaded successfully!")
-        return dataset
     except Exception as e:
-        print(f"❌ Error loading dataset: {e}")
-        return None
 recipes_df = load_recipes()
-if recipes_df is None:
-    st.error("❌ Failed to load dataset! Check internet or dataset availability.")
-    st.stop()  # Stops Streamlit from running further if the dataset isn't loaded
-# Load embedding model
 @st.cache_resource
 def load_embedding_model():
-    return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-embed_model = load_embedding_model()
-# Initialize ChromaDB
-chroma_client = chromadb.PersistentClient(path="./chroma_db")  # Saves vectors
-recipe_collection = chroma_client.get_or_create_collection(name="recipes")
-# Ensure recipes_df is iterable
-if isinstance(recipes_df, list) or isinstance(recipes_df, dict):
-    if recipe_collection.count() == 0:
-        st.info("Indexing recipes... This will take a few minutes.")
-        for i, recipe in enumerate(recipes_df):
-            title = recipe.get("title", "Unknown Title")  # Handle missing keys
-            ingredients = ", ".join(recipe.get("ingredients", []))
-            instructions = recipe.get("instructions", "No instructions available")
-            embedding = embed_model.encode(title).tolist()
-            recipe_collection.add(
-                ids=[str(i)],
-                embeddings=[embedding],
-                metadatas=[{"title": title, "ingredients": ingredients, "index": i}],
-            )
-else:
-    st.error("❌ Dataset is not in the correct format!")
-# UI
-st.title("🍽️ AI Recipe Finder with ChromaDB RAG")
-query = st.text_input("🔍 Search for a recipe (e.g., pasta, cake)")
-if query:
-    query_embedding = embed_model.encode(query).tolist()
-    results = recipe_collection.query(
-        query_embeddings=[query_embedding], n_results=5
-    )
-    st.subheader("🔎 Most relevant recipes:")
-    for result in results["metadatas"][0]:
-        index = result["index"]
-        recipe = recipes_df[index]
-        st.write(f"**🍴 {recipe.get('title', 'No title available')}**")
-        st.write(f"**Ingredients:** {', '.join(recipe.get('ingredients', []))}")
-        st.write(f"**Instructions:** {recipe.get('instructions', 'No instructions available')}")
-        st.write("---")
-else:
-    st.info("Type a recipe name to find similar recipes.")

 import streamlit as st
+import pandas as pd
 import chromadb
+from sentence_transformers import SentenceTransformer
+from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
+from PIL import Image
+from io import BytesIO
+import requests
+# --- 1. Load Recipes Dataset ---
+@st.cache_data
 def load_recipes():
     try:
+        recipes_df = pd.read_csv("recipes.csv")
+        recipes_df = recipes_df.rename(columns={"recipe_name": "title", "directions": "instructions"})
+        recipes_df = recipes_df[['title', 'ingredients', 'instructions', 'img_src']]
+        recipes_df.fillna("", inplace=True)
+        recipes_df["ingredients"] = recipes_df["ingredients"].str.lower().str.replace(r'[^\w\s]', '', regex=True)
+        recipes_df["combined_text"] = recipes_df["title"] + " " + recipes_df["ingredients"]
+        return recipes_df
     except Exception as e:
+        st.error(f"⚠ Error loading recipes: {e}")
+        return pd.DataFrame()
 recipes_df = load_recipes()
+# --- 2. Load SentenceTransformer Model ---
 @st.cache_resource
 def load_embedding_model():
+    return SentenceTransformer("all-mpnet-base-v2")
+embedding_model = load_embedding_model()
+# --- 3. Initialize ChromaDB ---
+chroma_client = chromadb.PersistentClient(path="./chroma_db")
+collection = chroma_client.get_or_create_collection(name="recipe_collection")
+# --- 4. Generate & Store Embeddings ---
+def get_sentence_transformer_embeddings(text):
+    return embedding_model.encode(text).tolist()
+try:
+    existing_data = collection.get()
+    existing_ids = set(existing_data["ids"]) if existing_data and "ids" in existing_data else set()
+except Exception as e:
+    st.error(f"⚠ ChromaDB Error: {e}")
+    existing_ids = set()
+for index, row in recipes_df.iterrows():
+    recipe_id = str(index)
+    if recipe_id in existing_ids:
+        continue
+    embedding = get_sentence_transformer_embeddings(row["combined_text"])
+    if embedding:
+        collection.add(embeddings=[embedding], documents=[row["combined_text"]], ids=[recipe_id])
+# --- 5. Retrieve Similar Recipes ---
+def retrieve_recipes(query, top_k=3):
+    query_embedding = get_sentence_transformer_embeddings(query)
+    results = collection.query(query_embeddings=[query_embedding], n_results=top_k)
+    if results and "ids" in results and results["ids"]:  # Check existence before accessing
+        recipe_indices = [int(id) for id in results["ids"][0] if id.isdigit()]
+        return recipes_df.iloc[recipe_indices] if recipe_indices else None
+    return None
+# --- 6. Load a Compatible LLM for Q&A ---
+@st.cache_resource
+def load_llm_model():
+    tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")  # Better Q&A model
+    model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
+    return pipeline("question-answering", model=model, tokenizer=tokenizer)
+llm_model = load_llm_model()
+# --- 5. Answer Greeting and Handle Q&A Queries ---
+def answer_question(query, context=""):
+    # Handle greetings or non-informational queries
+    greetings = ["hi", "hello", "hii", "hey", "greetings", "how are you", "what's up", "how's it going"]
+    if query.lower().strip() in greetings:
+        return "Hello! How can I assist you today? Feel free to ask about recipes or any other questions."
+    # If not a greeting, check if it is a valid Q&A query
+    if query.lower().strip() not in greetings:
+        # Use the QA model for other questions
+        response = qa_model(question=query, context=context)
+        # Check if the response from the model is valid
+        if response and "answer" in response and response["answer"].strip():
+            return response["answer"]
+        else:
+            return "I'm sorry, I couldn't generate a response for your query."
+    return None
+# --- 6. Classify Query Type (Q&A or Recipe Search) ---
+@st.cache_resource
+def load_classifier():
+    return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+classifier = load_classifier()
+def classify_query(query):
+    # Keywords that may indicate a recipe-related query
+    recipe_keywords = ["make", "cook", "bake", "recipe", "prepare"]
+    # Check if query contains common recipe-related keywords
+    if any(keyword in query.lower() for keyword in recipe_keywords):
+        return "Recipe Search"
+    labels = ["Q&A", "Recipe Search"]
+    result = classifier(query, labels)
+    return result["labels"][0]
+# --- 8. Display Image Function ---
+def display_image(image_url, recipe_name):
+    try:
+        if not isinstance(image_url, str) or not image_url.startswith("http"):
+            raise ValueError("Invalid or missing image URL")
+        response = requests.get(image_url, timeout=5)
+        response.raise_for_status()
+        image = Image.open(BytesIO(response.content))
+        st.image(image, caption=recipe_name, use_container_width=True)
+    except requests.exceptions.RequestException as e:
+        st.warning(f"⚠ Image fetch error: {e}")
+        placeholder_url = "https://via.placeholder.com/300?text=No+Image"
+        st.image(placeholder_url, caption=recipe_name, use_container_width=True)
+# --- Streamlit UI ---
+st.title("🍽️ AI Recipe & Q&A Assistant")
+# Unique key for the main user query input
+user_query = st.text_input("Enter your question or recipe search query:", "", key="main_query_input")
+# Use session state to store the retrieved recipe
+if "retrieved_recipes" not in st.session_state:
+    st.session_state["retrieved_recipes"] = None
+if st.button("Ask AI"):
+    if user_query:
+        # Handle greetings and other specific queries with answer_question
+        response = answer_question(user_query)
+        if response and "Hello!" in response:
+            st.subheader("🤖 AI Answer:")
+            st.write(response)
+        else:
+            # Classify the query if not a greeting
+            intent = classify_query(user_query)
+            if intent == "Q&A":
+                st.subheader("🤖 AI Answer:")
+                context = "You can add specific context here, or leave it empty."
+                response = answer_question(user_query, context)
+                st.write(response)
+            elif intent == "Recipe Search":
+                retrieved_recipes = retrieve_recipes(user_query)
+                if retrieved_recipes is not None and not retrieved_recipes.empty:
+                    st.session_state["retrieved_recipes"] = retrieved_recipes  # Store retrieved recipes in session state
+                    st.subheader("🍴 Found Recipes:")
+                    for index, recipe in retrieved_recipes.iterrows():
+                        st.markdown(f"### {recipe['title']}")
+                        st.write(f"**Ingredients:** {recipe['ingredients']}")
+                        st.write(f"**Instructions:** {recipe['instructions']}")
+                        display_image(recipe.get('img_src', ''), recipe['title'])
+                        # Unique key for each follow-up question input
+                        follow_up_query = st.text_input(
+                            "Any modifications or follow-up questions about this recipe?",
+                            key=f"follow_up_query_{index}"
+                        )
+                        if st.button(f"Submit Follow-up for {recipe['title']}", key=f"submit_follow_up_{index}"):
+                            # Handle follow-up query
+                            response = handle_follow_up(follow_up_query, recipe)
+                            st.write(response)
+                else:
+                    st.warning("⚠️ No relevant recipes found.")
+            else:
+                st.warning("❌ Unable to classify the query.")