ProjektseminarChatbot commited on
Commit
380829c
·
1 Parent(s): 3f1529b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS #facebook AI similarity search
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain import HuggingFaceHub
10
+
11
+
12
+ def main():
13
+ load_dotenv()
14
+ st.set_page_config(page_title="Ask your PDF")
15
+ st.header("Ask Your PDF")
16
+
17
+ pdf = st.file_uploader("Upload your pdf",type="pdf")
18
+
19
+ if pdf is not None:
20
+ pdf_reader = PdfReader(pdf)
21
+ text = ""
22
+ for page in pdf_reader.pages:
23
+ text += page.extract_text()
24
+
25
+ # spilit ito chuncks
26
+ text_splitter = CharacterTextSplitter(
27
+ separator="\n",
28
+ chunk_size=1000,
29
+ chunk_overlap=200,
30
+ length_function=len
31
+ )
32
+ chunks = text_splitter.split_text(text)
33
+
34
+ # create embedding
35
+ embeddings = HuggingFaceEmbeddings()
36
+
37
+ knowledge_base = FAISS.from_texts(chunks,embeddings)
38
+
39
+ user_question = st.text_input("Ask Question about your PDF:")
40
+ if user_question:
41
+ docs = knowledge_base.similarity_search(user_question)
42
+ llm = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature":5,
43
+ "max_length":64})
44
+ chain = load_qa_chain(llm,chain_type="stuff")
45
+ response = chain.run(input_documents=docs,question=user_question)
46
+
47
+ st.write(response)
48
+
49
+
50
+
51
+ # st.write(chunks)
52
+
53
+ if __name__ == '__main__':
54
+ main()