DrishtiSharma commited on
Commit
c2d2534
Β·
verified Β·
1 Parent(s): 515dae6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -26
app.py CHANGED
@@ -31,10 +31,14 @@ def check_poppler_installed():
31
  check_poppler_installed()
32
 
33
  def load_docs(document_path):
34
- loader = UnstructuredPDFLoader(document_path)
35
- documents = loader.load()
36
- text_splitter = NLTKTextSplitter(chunk_size=1000)
37
- return text_splitter.split_documents(documents)
 
 
 
 
38
 
39
  def already_indexed(vectordb, file_name):
40
  indexed_sources = set(
@@ -54,7 +58,7 @@ def load_chain(file_name=None):
54
  else:
55
  vectordb.delete_collection()
56
  docs = load_docs(file_name)
57
- st.write("Length: ", len(docs))
58
 
59
  vectordb = Chroma.from_documents(
60
  docs, HuggingFaceEmbeddings(), persist_directory=PERSISTED_DIRECTORY
@@ -81,9 +85,13 @@ def extract_patent_number(url):
81
  return match.group(1) if match else None
82
 
83
  def download_pdf(patent_number):
84
- patent_downloader = PatentDownloader()
85
- patent_downloader.download(patent=patent_number)
86
- return f"{patent_number}.pdf"
 
 
 
 
87
 
88
  if __name__ == "__main__":
89
  st.set_page_config(
@@ -93,57 +101,67 @@ if __name__ == "__main__":
93
  initial_sidebar_state="expanded",
94
  )
95
  st.header("πŸ“– Patent Chat: Google Patents Chat Demo")
96
-
97
  # Allow user to input the Google patent link
98
  patent_link = st.text_input("Enter Google Patent Link:", key="PATENT_LINK")
99
-
100
  if not patent_link:
101
  st.warning("Please enter a Google patent link to proceed.")
102
  st.stop()
103
- else:
104
- st.session_state["patent_link_configured"] = True
105
 
106
  patent_number = extract_patent_number(patent_link)
107
  if not patent_number:
108
  st.error("Invalid patent link format. Please provide a valid Google patent link.")
109
  st.stop()
110
 
111
- st.write("Patent number: ", patent_number)
112
 
 
113
  pdf_path = f"{patent_number}.pdf"
114
  if os.path.isfile(pdf_path):
115
- st.write("File already downloaded.")
116
  else:
117
- st.write("Downloading patent file...")
118
  pdf_path = download_pdf(patent_number)
119
- st.write("File downloaded.")
120
 
 
 
121
  chain = load_chain(pdf_path)
 
122
 
 
123
  if "messages" not in st.session_state:
124
  st.session_state["messages"] = [
125
- {"role": "assistant", "content": "How can I help you?"}
126
  ]
127
 
 
128
  for message in st.session_state.messages:
129
  with st.chat_message(message["role"]):
130
  st.markdown(message["content"])
131
 
 
132
  if user_input := st.chat_input("What is your question?"):
133
  st.session_state.messages.append({"role": "user", "content": user_input})
134
  with st.chat_message("user"):
135
  st.markdown(user_input)
136
 
 
137
  with st.chat_message("assistant"):
138
  message_placeholder = st.empty()
139
  full_response = ""
140
 
141
- with st.spinner("CHAT-BOT is at Work ..."):
142
- assistant_response = chain({"question": user_input})
143
- for chunk in assistant_response["answer"].split():
144
- full_response += chunk + " "
145
- time.sleep(0.05)
146
- message_placeholder.markdown(full_response + "β–Œ")
147
- st.session_state.messages.append(
148
- {"role": "assistant", "content": full_response}
149
- )
 
 
 
 
 
31
  check_poppler_installed()
32
 
33
  def load_docs(document_path):
34
+ try:
35
+ loader = UnstructuredPDFLoader(document_path)
36
+ documents = loader.load()
37
+ text_splitter = NLTKTextSplitter(chunk_size=1000)
38
+ return text_splitter.split_documents(documents)
39
+ except Exception as e:
40
+ st.error(f"Failed to load and process PDF: {e}")
41
+ st.stop()
42
 
43
  def already_indexed(vectordb, file_name):
44
  indexed_sources = set(
 
58
  else:
59
  vectordb.delete_collection()
60
  docs = load_docs(file_name)
61
+ st.write("Length of Documents: ", len(docs))
62
 
63
  vectordb = Chroma.from_documents(
64
  docs, HuggingFaceEmbeddings(), persist_directory=PERSISTED_DIRECTORY
 
85
  return match.group(1) if match else None
86
 
87
  def download_pdf(patent_number):
88
+ try:
89
+ patent_downloader = PatentDownloader(verbose=True)
90
+ output_path = patent_downloader.download(patents=patent_number)
91
+ return output_path[0] # Return the first file path
92
+ except Exception as e:
93
+ st.error(f"Failed to download patent PDF: {e}")
94
+ st.stop()
95
 
96
  if __name__ == "__main__":
97
  st.set_page_config(
 
101
  initial_sidebar_state="expanded",
102
  )
103
  st.header("πŸ“– Patent Chat: Google Patents Chat Demo")
104
+
105
  # Allow user to input the Google patent link
106
  patent_link = st.text_input("Enter Google Patent Link:", key="PATENT_LINK")
107
+
108
  if not patent_link:
109
  st.warning("Please enter a Google patent link to proceed.")
110
  st.stop()
 
 
111
 
112
  patent_number = extract_patent_number(patent_link)
113
  if not patent_number:
114
  st.error("Invalid patent link format. Please provide a valid Google patent link.")
115
  st.stop()
116
 
117
+ st.write(f"Patent number: **{patent_number}**")
118
 
119
+ # Download the PDF file
120
  pdf_path = f"{patent_number}.pdf"
121
  if os.path.isfile(pdf_path):
122
+ st.write("βœ… File already downloaded.")
123
  else:
124
+ st.write("πŸ“₯ Downloading patent file...")
125
  pdf_path = download_pdf(patent_number)
126
+ st.write(f"βœ… File downloaded: {pdf_path}")
127
 
128
+ # Load the conversational chain
129
+ st.write("πŸ”„ Loading document into the system...")
130
  chain = load_chain(pdf_path)
131
+ st.success("Document successfully loaded! You can now start asking questions.")
132
 
133
+ # Initialize the chat
134
  if "messages" not in st.session_state:
135
  st.session_state["messages"] = [
136
+ {"role": "assistant", "content": "Hello! How can I assist you with this patent?"}
137
  ]
138
 
139
+ # Display chat history
140
  for message in st.session_state.messages:
141
  with st.chat_message(message["role"]):
142
  st.markdown(message["content"])
143
 
144
+ # User input
145
  if user_input := st.chat_input("What is your question?"):
146
  st.session_state.messages.append({"role": "user", "content": user_input})
147
  with st.chat_message("user"):
148
  st.markdown(user_input)
149
 
150
+ # Generate assistant response
151
  with st.chat_message("assistant"):
152
  message_placeholder = st.empty()
153
  full_response = ""
154
 
155
+ with st.spinner("Generating response..."):
156
+ try:
157
+ assistant_response = chain({"question": user_input})
158
+ for chunk in assistant_response["answer"].split():
159
+ full_response += chunk + " "
160
+ time.sleep(0.05) # Simulate typing effect
161
+ message_placeholder.markdown(full_response + "β–Œ")
162
+ except Exception as e:
163
+ full_response = f"An error occurred: {e}"
164
+ finally:
165
+ message_placeholder.markdown(full_response)
166
+
167
+ st.session_state.messages.append({"role": "assistant", "content": full_response})