Tamil Eniyan commited on
Commit
076c725
·
1 Parent(s): ae479fd

Add application file

Browse files
Files changed (4) hide show
  1. app.py +63 -2
  2. chunks.pkl +3 -0
  3. faiss_index.index +0 -0
  4. requirements.txt +8 -0
app.py CHANGED
@@ -1,4 +1,65 @@
1
  import streamlit as st
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import faiss
3
+ import numpy as np
4
+ import pickle
5
+ from sentence_transformers import SentenceTransformer
6
+ from transformers import pipeline
7
 
8
+ # File names for saved data
9
+ INDEX_FILE = "faiss_index.index"
10
+ CHUNKS_FILE = "chunks.pkl"
11
+ EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
12
+ QA_MODEL_NAME = "deepset/roberta-base-squad2" # You can change this to any Hugging Face QA model
13
+
14
+ @st.cache_resource
15
+ def load_index_and_chunks():
16
+ index = faiss.read_index(INDEX_FILE)
17
+ with open(CHUNKS_FILE, "rb") as f:
18
+ chunks = pickle.load(f)
19
+ return index, chunks
20
+
21
+ @st.cache_resource
22
+ def load_embedding_model():
23
+ return SentenceTransformer(EMBEDDING_MODEL_NAME)
24
+
25
+ @st.cache_resource
26
+ def load_qa_pipeline():
27
+ # This QA pipeline expects a question and a context
28
+ return pipeline("question-answering", model=QA_MODEL_NAME, tokenizer=QA_MODEL_NAME)
29
+
30
+ def main():
31
+ st.title("PDF Question-Answering App")
32
+
33
+ # Load FAISS index, chunks, and models
34
+ index, chunks = load_index_and_chunks()
35
+ embed_model = load_embedding_model()
36
+ qa_pipeline = load_qa_pipeline()
37
+
38
+ st.write("Enter your question about the PDF document:")
39
+ query = st.text_input("Question:")
40
+
41
+ if query:
42
+ # Encode the query using the same SentenceTransformer model
43
+ query_embedding = embed_model.encode([query]).astype('float32')
44
+
45
+ # Retrieve top k relevant chunks
46
+ k = 3
47
+ distances, indices = index.search(query_embedding, k)
48
+
49
+ st.subheader("Retrieved Context:")
50
+ context = ""
51
+ for idx in indices[0]:
52
+ context_piece = chunks[idx]
53
+ context += context_piece + " "
54
+ st.write(context_piece)
55
+
56
+ st.subheader("Answer:")
57
+ try:
58
+ # Use the QA pipeline to generate an answer based on the combined context
59
+ result = qa_pipeline(question=query, context=context)
60
+ st.write(result["answer"])
61
+ except Exception as e:
62
+ st.error(f"Error generating answer: {e}")
63
+
64
+ if __name__ == "__main__":
65
+ main()
chunks.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c1ab50a97f033917bd851977a80740d1571050d23783ded500413d58dc9c3e
3
+ size 9141
faiss_index.index ADDED
Binary file (4.65 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ faiss-cpu
3
+ numpy
4
+ sentence-transformers
5
+ transformers
6
+ PyPDF2
7
+ nltk
8
+ torch