Spaces:
Sleeping
Sleeping
Commit
·
ea4a3d3
1
Parent(s):
9fb5230
using streamlit caches to remember model weights
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 📹
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
|
|
1 |
---
|
2 |
+
title: PDF Buddie
|
3 |
emoji: 📹
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
app.py
CHANGED
@@ -10,32 +10,6 @@ from langchain.llms import HuggingFaceHub
|
|
10 |
import os
|
11 |
from dotenv import load_dotenv
|
12 |
|
13 |
-
# Load the model and store it as a global variable
|
14 |
-
model_name = "hkunlp/instructor-xl"
|
15 |
-
model_kwargs = {'device': 'cpu'}
|
16 |
-
embeddings = HuggingFaceInstructEmbeddings(
|
17 |
-
model_name=model_name, model_kwargs=model_kwargs
|
18 |
-
)
|
19 |
-
|
20 |
-
# Load the conversation chain and store it as a global variable
|
21 |
-
llm = HuggingFaceHub(
|
22 |
-
repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 218}
|
23 |
-
)
|
24 |
-
|
25 |
-
# Set the Streamlit page configuration and CSS styles
|
26 |
-
st.set_page_config(page_title="PDF Buddy", page_icon=":coffee:")
|
27 |
-
st.markdown(
|
28 |
-
"""
|
29 |
-
<style>
|
30 |
-
body {
|
31 |
-
background-color: #fce6ef;
|
32 |
-
}
|
33 |
-
</style>
|
34 |
-
""",
|
35 |
-
unsafe_allow_html=True
|
36 |
-
)
|
37 |
-
st.write(css, unsafe_allow_html=True)
|
38 |
-
|
39 |
|
40 |
def get_pdf_text(pdf_docs):
|
41 |
text = ""
|
@@ -56,16 +30,25 @@ def get_text_chunks(text):
|
|
56 |
chunks = text_splitter.split_text(text)
|
57 |
return chunks
|
58 |
|
59 |
-
|
60 |
def get_vectorstore(text_chunks):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
62 |
return vectorstore
|
63 |
|
64 |
-
|
65 |
def get_conversation_chain(vectorstore):
|
|
|
|
|
|
|
66 |
memory = ConversationBufferMemory(
|
67 |
-
memory_key='chat_history', return_messages=True
|
68 |
-
)
|
69 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
70 |
llm=llm,
|
71 |
retriever=vectorstore.as_retriever(),
|
@@ -88,6 +71,20 @@ def handle_userinput(user_question):
|
|
88 |
|
89 |
|
90 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
if "conversation" not in st.session_state:
|
92 |
st.session_state.conversation = None
|
93 |
if "chat_history" not in st.session_state:
|
@@ -101,8 +98,7 @@ def main():
|
|
101 |
with st.sidebar:
|
102 |
st.subheader("Your documents")
|
103 |
pdf_docs = st.file_uploader(
|
104 |
-
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True
|
105 |
-
)
|
106 |
if st.button("Process"):
|
107 |
with st.spinner("Processing"):
|
108 |
# get pdf text
|
@@ -116,9 +112,8 @@ def main():
|
|
116 |
|
117 |
# create conversation chain
|
118 |
st.session_state.conversation = get_conversation_chain(
|
119 |
-
vectorstore
|
120 |
-
)
|
121 |
|
122 |
|
123 |
if __name__ == '__main__':
|
124 |
-
main()
|
|
|
10 |
import os
|
11 |
from dotenv import load_dotenv
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
def get_pdf_text(pdf_docs):
|
15 |
text = ""
|
|
|
30 |
chunks = text_splitter.split_text(text)
|
31 |
return chunks
|
32 |
|
33 |
+
@st.cache
|
34 |
def get_vectorstore(text_chunks):
|
35 |
+
# embeddings = OpenAIEmbeddings()
|
36 |
+
print("HAHA")
|
37 |
+
model_name = "hkunlp/instructor-xl"
|
38 |
+
model_kwargs = {'device': 'cpu'}
|
39 |
+
embeddings = HuggingFaceInstructEmbeddings(
|
40 |
+
model_name=model_name, model_kwargs=model_kwargs)
|
41 |
+
print("HAHA")
|
42 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
43 |
return vectorstore
|
44 |
|
45 |
+
@st.cache
|
46 |
def get_conversation_chain(vectorstore):
|
47 |
+
# llm = ChatOpenAI()
|
48 |
+
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":218})
|
49 |
+
|
50 |
memory = ConversationBufferMemory(
|
51 |
+
memory_key='chat_history', return_messages=True)
|
|
|
52 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
53 |
llm=llm,
|
54 |
retriever=vectorstore.as_retriever(),
|
|
|
71 |
|
72 |
|
73 |
def main():
|
74 |
+
load_dotenv()
|
75 |
+
st.set_page_config(page_title="PDF Buddy", page_icon=":coffee:")
|
76 |
+
st.markdown(
|
77 |
+
"""
|
78 |
+
<style>
|
79 |
+
body {
|
80 |
+
background-color: #fce6ef;
|
81 |
+
}
|
82 |
+
</style>
|
83 |
+
""",
|
84 |
+
unsafe_allow_html=True
|
85 |
+
)
|
86 |
+
st.write(css, unsafe_allow_html=True)
|
87 |
+
|
88 |
if "conversation" not in st.session_state:
|
89 |
st.session_state.conversation = None
|
90 |
if "chat_history" not in st.session_state:
|
|
|
98 |
with st.sidebar:
|
99 |
st.subheader("Your documents")
|
100 |
pdf_docs = st.file_uploader(
|
101 |
+
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
|
|
|
102 |
if st.button("Process"):
|
103 |
with st.spinner("Processing"):
|
104 |
# get pdf text
|
|
|
112 |
|
113 |
# create conversation chain
|
114 |
st.session_state.conversation = get_conversation_chain(
|
115 |
+
vectorstore)
|
|
|
116 |
|
117 |
|
118 |
if __name__ == '__main__':
|
119 |
+
main()
|