SinhNguyen commited on
Commit
ea4a3d3
·
1 Parent(s): 9fb5230

using streamlit caches to remember model weights

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +30 -35
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Image To Text App
3
  emoji: 📹
4
  colorFrom: blue
5
  colorTo: red
 
1
  ---
2
+ title: PDF Buddie
3
  emoji: 📹
4
  colorFrom: blue
5
  colorTo: red
app.py CHANGED
@@ -10,32 +10,6 @@ from langchain.llms import HuggingFaceHub
10
  import os
11
  from dotenv import load_dotenv
12
 
13
- # Load the model and store it as a global variable
14
- model_name = "hkunlp/instructor-xl"
15
- model_kwargs = {'device': 'cpu'}
16
- embeddings = HuggingFaceInstructEmbeddings(
17
- model_name=model_name, model_kwargs=model_kwargs
18
- )
19
-
20
- # Load the conversation chain and store it as a global variable
21
- llm = HuggingFaceHub(
22
- repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 218}
23
- )
24
-
25
- # Set the Streamlit page configuration and CSS styles
26
- st.set_page_config(page_title="PDF Buddy", page_icon=":coffee:")
27
- st.markdown(
28
- """
29
- <style>
30
- body {
31
- background-color: #fce6ef;
32
- }
33
- </style>
34
- """,
35
- unsafe_allow_html=True
36
- )
37
- st.write(css, unsafe_allow_html=True)
38
-
39
 
40
  def get_pdf_text(pdf_docs):
41
  text = ""
@@ -56,16 +30,25 @@ def get_text_chunks(text):
56
  chunks = text_splitter.split_text(text)
57
  return chunks
58
 
59
-
60
  def get_vectorstore(text_chunks):
 
 
 
 
 
 
 
61
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
62
  return vectorstore
63
 
64
-
65
  def get_conversation_chain(vectorstore):
 
 
 
66
  memory = ConversationBufferMemory(
67
- memory_key='chat_history', return_messages=True
68
- )
69
  conversation_chain = ConversationalRetrievalChain.from_llm(
70
  llm=llm,
71
  retriever=vectorstore.as_retriever(),
@@ -88,6 +71,20 @@ def handle_userinput(user_question):
88
 
89
 
90
  def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  if "conversation" not in st.session_state:
92
  st.session_state.conversation = None
93
  if "chat_history" not in st.session_state:
@@ -101,8 +98,7 @@ def main():
101
  with st.sidebar:
102
  st.subheader("Your documents")
103
  pdf_docs = st.file_uploader(
104
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
105
- )
106
  if st.button("Process"):
107
  with st.spinner("Processing"):
108
  # get pdf text
@@ -116,9 +112,8 @@ def main():
116
 
117
  # create conversation chain
118
  st.session_state.conversation = get_conversation_chain(
119
- vectorstore
120
- )
121
 
122
 
123
  if __name__ == '__main__':
124
- main()
 
10
  import os
11
  from dotenv import load_dotenv
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def get_pdf_text(pdf_docs):
15
  text = ""
 
30
  chunks = text_splitter.split_text(text)
31
  return chunks
32
 
33
+ @st.cache
34
  def get_vectorstore(text_chunks):
35
+ # embeddings = OpenAIEmbeddings()
36
+ print("HAHA")
37
+ model_name = "hkunlp/instructor-xl"
38
+ model_kwargs = {'device': 'cpu'}
39
+ embeddings = HuggingFaceInstructEmbeddings(
40
+ model_name=model_name, model_kwargs=model_kwargs)
41
+ print("HAHA")
42
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
43
  return vectorstore
44
 
45
+ @st.cache
46
  def get_conversation_chain(vectorstore):
47
+ # llm = ChatOpenAI()
48
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":218})
49
+
50
  memory = ConversationBufferMemory(
51
+ memory_key='chat_history', return_messages=True)
 
52
  conversation_chain = ConversationalRetrievalChain.from_llm(
53
  llm=llm,
54
  retriever=vectorstore.as_retriever(),
 
71
 
72
 
73
  def main():
74
+ load_dotenv()
75
+ st.set_page_config(page_title="PDF Buddy", page_icon=":coffee:")
76
+ st.markdown(
77
+ """
78
+ <style>
79
+ body {
80
+ background-color: #fce6ef;
81
+ }
82
+ </style>
83
+ """,
84
+ unsafe_allow_html=True
85
+ )
86
+ st.write(css, unsafe_allow_html=True)
87
+
88
  if "conversation" not in st.session_state:
89
  st.session_state.conversation = None
90
  if "chat_history" not in st.session_state:
 
98
  with st.sidebar:
99
  st.subheader("Your documents")
100
  pdf_docs = st.file_uploader(
101
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
 
102
  if st.button("Process"):
103
  with st.spinner("Processing"):
104
  # get pdf text
 
112
 
113
  # create conversation chain
114
  st.session_state.conversation = get_conversation_chain(
115
+ vectorstore)
 
116
 
117
 
118
  if __name__ == '__main__':
119
+ main()