Spaces:

Eniyan
/

RagLMM

Sleeping

App Files Files Community

Tamil Eniyan commited on Feb 15

Commit

076c725

1 Parent(s): ae479fd

Add application file

Browse files

Files changed (4) hide show

app.py +63 -2
chunks.pkl +3 -0
faiss_index.index +0 -0
requirements.txt +8 -0

app.py CHANGED Viewed

@@ -1,4 +1,65 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+import faiss
+import numpy as np
+import pickle
+from sentence_transformers import SentenceTransformer
+from transformers import pipeline
+# File names for saved data
+INDEX_FILE = "faiss_index.index"
+CHUNKS_FILE = "chunks.pkl"
+EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+QA_MODEL_NAME = "deepset/roberta-base-squad2"  # You can change this to any Hugging Face QA model
+@st.cache_resource
+def load_index_and_chunks():
+    index = faiss.read_index(INDEX_FILE)
+    with open(CHUNKS_FILE, "rb") as f:
+        chunks = pickle.load(f)
+    return index, chunks
+@st.cache_resource
+def load_embedding_model():
+    return SentenceTransformer(EMBEDDING_MODEL_NAME)
+@st.cache_resource
+def load_qa_pipeline():
+    # This QA pipeline expects a question and a context
+    return pipeline("question-answering", model=QA_MODEL_NAME, tokenizer=QA_MODEL_NAME)
+def main():
+    st.title("PDF Question-Answering App")
+    # Load FAISS index, chunks, and models
+    index, chunks = load_index_and_chunks()
+    embed_model = load_embedding_model()
+    qa_pipeline = load_qa_pipeline()
+    st.write("Enter your question about the PDF document:")
+    query = st.text_input("Question:")
+    if query:
+        # Encode the query using the same SentenceTransformer model
+        query_embedding = embed_model.encode([query]).astype('float32')
+        # Retrieve top k relevant chunks
+        k = 3
+        distances, indices = index.search(query_embedding, k)
+        st.subheader("Retrieved Context:")
+        context = ""
+        for idx in indices[0]:
+            context_piece = chunks[idx]
+            context += context_piece + " "
+            st.write(context_piece)
+        st.subheader("Answer:")
+        try:
+            # Use the QA pipeline to generate an answer based on the combined context
+            result = qa_pipeline(question=query, context=context)
+            st.write(result["answer"])
+        except Exception as e:
+            st.error(f"Error generating answer: {e}")
+if __name__ == "__main__":
+    main()

chunks.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7c1ab50a97f033917bd851977a80740d1571050d23783ded500413d58dc9c3e
+size 9141

faiss_index.index ADDED Viewed

Binary file (4.65 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+faiss-cpu
+numpy
+sentence-transformers
+transformers
+PyPDF2
+nltk
+torch