Spaces:

SinhNguyen
/

pdf_buddy

Sleeping

App Files Files Community

SinhNguyen commited on Jul 5, 2023

Commit

ea4a3d3

1 Parent(s): 9fb5230

using streamlit caches to remember model weights

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +30 -35

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Image To Text App
 emoji: 📹
 colorFrom: blue
 colorTo: red

 ---
+title: PDF Buddie
 emoji: 📹
 colorFrom: blue
 colorTo: red

app.py CHANGED Viewed

@@ -10,32 +10,6 @@ from langchain.llms import HuggingFaceHub
 import os
 from dotenv import load_dotenv
-# Load the model and store it as a global variable
-model_name = "hkunlp/instructor-xl"
-model_kwargs = {'device': 'cpu'}
-embeddings = HuggingFaceInstructEmbeddings(
-    model_name=model_name, model_kwargs=model_kwargs
-)
-# Load the conversation chain and store it as a global variable
-llm = HuggingFaceHub(
-    repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 218}
-)
-# Set the Streamlit page configuration and CSS styles
-st.set_page_config(page_title="PDF Buddy", page_icon=":coffee:")
-st.markdown(
-    """
-    <style>
-    body {
-        background-color: #fce6ef;
-    }
-    </style>
-    """,
-    unsafe_allow_html=True
-)
-st.write(css, unsafe_allow_html=True)
 def get_pdf_text(pdf_docs):
     text = ""
@@ -56,16 +30,25 @@ def get_text_chunks(text):
     chunks = text_splitter.split_text(text)
     return chunks
 def get_vectorstore(text_chunks):
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 def get_conversation_chain(vectorstore):
     memory = ConversationBufferMemory(
-        memory_key='chat_history', return_messages=True
-    )
     conversation_chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
         retriever=vectorstore.as_retriever(),
@@ -88,6 +71,20 @@ def handle_userinput(user_question):
 def main():
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
@@ -101,8 +98,7 @@ def main():
     with st.sidebar:
         st.subheader("Your documents")
         pdf_docs = st.file_uploader(
-            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
-        )
         if st.button("Process"):
             with st.spinner("Processing"):
                 # get pdf text
@@ -116,9 +112,8 @@ def main():
                 # create conversation chain
                 st.session_state.conversation = get_conversation_chain(
-                    vectorstore
-                )
 if __name__ == '__main__':
-    main()

 import os
 from dotenv import load_dotenv
 def get_pdf_text(pdf_docs):
     text = ""
     chunks = text_splitter.split_text(text)
     return chunks
+@st.cache
 def get_vectorstore(text_chunks):
+    # embeddings = OpenAIEmbeddings()
+    print("HAHA")
+    model_name = "hkunlp/instructor-xl"
+    model_kwargs = {'device': 'cpu'}
+    embeddings  = HuggingFaceInstructEmbeddings(
+                model_name=model_name, model_kwargs=model_kwargs)
+    print("HAHA")
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
+@st.cache
 def get_conversation_chain(vectorstore):
+    # llm = ChatOpenAI()
+    llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":218})
     memory = ConversationBufferMemory(
+        memory_key='chat_history', return_messages=True)
     conversation_chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
         retriever=vectorstore.as_retriever(),
 def main():
+    load_dotenv()
+    st.set_page_config(page_title="PDF Buddy", page_icon=":coffee:")
+    st.markdown(
+        """
+        <style>
+        body {
+            background-color: #fce6ef;
+        }
+        </style>
+        """,
+        unsafe_allow_html=True
+    )
+    st.write(css, unsafe_allow_html=True)
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
     with st.sidebar:
         st.subheader("Your documents")
         pdf_docs = st.file_uploader(
+            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
         if st.button("Process"):
             with st.spinner("Processing"):
                 # get pdf text
                 # create conversation chain
                 st.session_state.conversation = get_conversation_chain(
+                    vectorstore)
 if __name__ == '__main__':
+    main()