bainskarman commited on
Commit
7b666bb
·
verified ·
1 Parent(s): 6c00599

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py CHANGED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.llms import HuggingFacePipeline
8
+ import torch
9
+ from transformers import pipeline
10
+
11
+ # Load a smaller LLM (e.g., Zephyr-7B or Mistral-7B)
12
+ def load_llm():
13
+ model_name = "HuggingFaceH4/zephyr-7b-alpha" # Replace with your preferred model
14
+ pipe = pipeline("text-generation", model=model_name, torch_dtype=torch.float16, device_map="auto")
15
+ llm = HuggingFacePipeline(pipeline=pipe)
16
+ return llm
17
+
18
+ # Extract text from PDF
19
+ def extract_text_from_pdf(file):
20
+ reader = PdfReader(file)
21
+ text = ""
22
+ for page in reader.pages:
23
+ text += page.extract_text()
24
+ return text
25
+
26
+ # Split text into chunks
27
+ def split_text(text, chunk_size=1000, chunk_overlap=200):
28
+ splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
29
+ chunks = splitter.split_text(text)
30
+ return chunks
31
+
32
+ # Create embeddings and vector store
33
+ def create_vector_store(chunks):
34
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
35
+ vector_store = FAISS.from_texts(chunks, embeddings)
36
+ return vector_store
37
+
38
+ # Query the PDF
39
+ def query_pdf(vector_store, query, llm):
40
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())
41
+ result = qa.run(query)
42
+ return result
43
+
44
+ # Streamlit App
45
+ def main():
46
+ st.title("Chat with PDF")
47
+ st.write("Upload a PDF and ask questions about it!")
48
+
49
+ # File uploader
50
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
51
+ if uploaded_file is None:
52
+ st.info("Using default PDF.")
53
+ uploaded_file = "default.pdf" # Add a default PDF
54
+
55
+ # Extract text
56
+ text = extract_text_from_pdf(uploaded_file)
57
+
58
+ # Split text into chunks
59
+ chunks = split_text(text)
60
+
61
+ # Create vector store
62
+ vector_store = create_vector_store(chunks)
63
+
64
+ # Load LLM
65
+ llm = load_llm()
66
+
67
+ # Query translation options
68
+ query_method = st.selectbox(
69
+ "Query Translation Method",
70
+ ["Multi-Query", "RAG Fusion", "Decomposition", "Step Back", "HyDE"],
71
+ help="Choose a method to improve query retrieval."
72
+ )
73
+
74
+ # User input
75
+ query = st.text_input("Ask a question about the PDF:")
76
+ if query:
77
+ # Query the PDF
78
+ result = query_pdf(vector_store, query, llm)
79
+ st.write("**Answer:**", result["answer"])
80
+ st.write("**Source Text:**", result["source_text"])
81
+
82
+ if __name__ == "__main__":
83
+ main()