Silence1412 commited on
Commit
d55b3c4
·
1 Parent(s): 0fd442f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.llms import OpenAI
8
+ from langchain.callbacks import get_openai_callback
9
+ import os
10
+ import openai
11
+ from streamlit_chat import message
12
+
13
+
14
+ def main():
15
+ OPENAI_API_KEY = st.text_input("Input your OpenAI API key", "")
16
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
17
+ # st.header("Ask your PDF 💬")
18
+
19
+ # upload file
20
+ pdf = st.file_uploader("Upload your PDF", type="pdf")
21
+
22
+ # extract the text
23
+ if pdf is not None:
24
+ pdf_reader = PdfReader(pdf)
25
+ text = ""
26
+ for page in pdf_reader.pages:
27
+ text += page.extract_text()
28
+
29
+ # split into chunks
30
+ text_splitter = CharacterTextSplitter(
31
+ separator="\n",
32
+ chunk_size=1000,
33
+ chunk_overlap=200,
34
+ length_function=len
35
+ )
36
+ chunks = text_splitter.split_text(text)
37
+
38
+ # create embeddings
39
+ embeddings = OpenAIEmbeddings()
40
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
41
+
42
+ if 'generated' not in st.session_state:
43
+ st.session_state['generated'] = []
44
+ if 'past' not in st.session_state:
45
+ st.session_state['past'] = []
46
+
47
+ # show user input
48
+ user_question = st.text_input("Ask a question about your PDF:")
49
+ if user_question:
50
+ docs = knowledge_base.similarity_search(user_question)
51
+
52
+ llm = OpenAI()
53
+ chain = load_qa_chain(llm, chain_type="stuff")
54
+ with get_openai_callback() as cb:
55
+ response = chain.run(input_documents=docs, question=user_question)
56
+ print(cb)
57
+
58
+ #st.write(response)
59
+ st.session_state.past.append(user_question)
60
+ st.session_state.generated.append(response)
61
+
62
+ if st.session_state['generated']:
63
+ for i in range(len(st.session_state['generated'])-1, -1, -1):
64
+ message(st.session_state["generated"][i], key=str(i))
65
+ message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
66
+
67
+
68
+ if __name__ == '__main__':
69
+ main()