tdurzynski commited on
Commit
9ca2091
·
verified ·
1 Parent(s): 6f98b16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -131
app.py CHANGED
@@ -1,19 +1,17 @@
1
  import os
2
  import logging
3
  import gradio as gr
 
4
  from dotenv import load_dotenv
5
- from langchain.document_loaders import ArxivLoader, PyPDFLoader
6
  from langchain.text_splitter import TokenTextSplitter
7
  from langchain.vectorstores import Chroma
8
  from langchain_community.embeddings import HuggingFaceHubEmbeddings
9
- from langchain.chains import RetrievalQA
10
- from langchain.chains.summarize import load_summarize_chain
11
  from langchain_groq import ChatGroq
12
- from transformers import pipeline
13
  from PyPDF2 import PdfReader
14
  from huggingface_hub import login
15
  from groq import AsyncGroq, Groq
16
- import asyncio
17
 
18
  # Load environment variables
19
  load_dotenv()
@@ -34,92 +32,17 @@ login(HUGGING_API_KEY)
34
  # Load models and embeddings
35
  embedding_model = HuggingFaceHubEmbeddings(huggingfacehub_api_token=HUGGING_API_KEY)
36
  llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
37
-
38
- def display_results(result):
39
- """Format and display results properly."""
40
- return "\n".join(result)
41
-
42
- def summarize_text(text):
43
- """Summarize text using the Groq API."""
44
- try:
45
- sum_client = Groq(api_key=GROQ_API_KEY)
46
- messages = [
47
- {"role": "system", "content": "You are an excellent analyst who excels in summarization task. If I give you the whole text, you should summarize it."},
48
- {"role": "user", "content": f"Summarize the paper: {text}"}
49
- ]
50
-
51
- response = sum_client.chat.completions.create(
52
- messages=messages,
53
- model="llama3-70b-8192",
54
- temperature=0,
55
- max_tokens=8192,
56
- top_p=1,
57
- )
58
- return response.choices[0].message.content
59
-
60
- except Exception as e:
61
- logger.error(f"Error summarizing text: {e}")
62
- return "Error in summarization."
63
-
64
- def summarize_pdf(pdf_file_path, max_length):
65
- """Extract text from a PDF and summarize it."""
66
- try:
67
- reader = PdfReader(pdf_file_path)
68
- text = "\n".join(page.extract_text() or "" for page in reader.pages)
69
-
70
- text_splitter = TokenTextSplitter(chunk_size=8192, chunk_overlap=1000)
71
- chunks = text_splitter.split_text(text)
72
-
73
- summary = ""
74
- for chunk in chunks:
75
- summary += summarize_text(chunk)
76
-
77
- return summary
78
-
79
- except Exception as e:
80
- logger.error(f"Error summarizing PDF: {e}")
81
- return "Failed to process the PDF."
82
-
83
- def summarize_arxiv_pdf(query):
84
- """Summarize an arXiv paper given a query."""
85
- try:
86
- loader = ArxivLoader(query=query, load_max_docs=10)
87
- documents = loader.load()
88
- text_splitter = TokenTextSplitter(chunk_size=5700, chunk_overlap=100)
89
- chunks = text_splitter.split_documents(documents)
90
-
91
- ref_summary = ""
92
- for chunk in chunks:
93
- ref_summary += summarize_text(chunk.page_content)
94
-
95
- arxiv_summary = loader.get_summaries_as_docs()
96
-
97
- summaries = []
98
- for doc in arxiv_summary:
99
- title = doc.metadata.get("Title", "Unknown Title")
100
- authors = doc.metadata.get("Authors", "Unknown Authors")
101
- url = doc.metadata.get("Entry ID", "No URL")
102
-
103
- summaries.append(f"**{title}**\n")
104
- summaries.append(f"**Authors:** {authors}\n")
105
- summaries.append(f"**View full paper:** [Link to paper]({url})\n")
106
- summaries.append(f"**Summary:** {doc.page_content}\n")
107
- summaries.append(f"**Enhanced Summary:**\n {ref_summary}")
108
-
109
- return display_results(summaries)
110
-
111
- except Exception as e:
112
- logger.error(f"Error summarizing arXiv paper: {e}")
113
- return "Failed to process arXiv paper."
114
-
115
  client = AsyncGroq(api_key=GROQ_API_KEY)
116
 
 
 
 
117
  async def chat_with_replit(message, history):
118
- """Chat functionality using Groq API."""
119
  try:
120
  messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
121
 
122
- for chat in history:
123
  user_msg, assistant_msg = chat
124
  messages.append({"role": "user", "content": user_msg})
125
  messages.append({"role": "assistant", "content": assistant_msg})
@@ -132,7 +55,7 @@ async def chat_with_replit(message, history):
132
  temperature=0,
133
  max_tokens=1024,
134
  top_p=1,
135
- stream=False, # Using non-streaming for simplicity in this integration.
136
  )
137
  return response.choices[0].message.content
138
 
@@ -140,13 +63,24 @@ async def chat_with_replit(message, history):
140
  logger.error(f"Chat error: {e}")
141
  return "Error in chat response."
142
 
143
- async def chat_with_replit_pdf(message, history, doi_num):
144
- """Chat with arXiv papers using document retrieval."""
 
 
 
 
 
 
 
145
  try:
 
146
  loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
147
  documents = loader.load_and_split()
 
 
148
  metadata = documents[0].metadata
149
 
 
150
  vector_store = Chroma.from_documents(documents, embedding_model)
151
 
152
  def retrieve_relevant_content(user_query):
@@ -173,63 +107,145 @@ async def chat_with_replit_pdf(message, history, doi_num):
173
  return response.choices[0].message.content
174
 
175
  except Exception as e:
176
- logger.error(f"Error in chat with PDF: {e}")
177
- return "Error processing chat with PDF."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Define a synchronous wrapper for the async chat function
180
- def chat_with_replit_sync(message, history):
181
- return asyncio.run(chat_with_replit(message, history))
 
 
 
182
 
183
- # Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  with gr.Blocks() as app:
185
- # Tab for Local PDF Summarization
186
- with gr.Tab(label="Local PDF Summarization"):
187
- with gr.Row():
188
- input_pdf = gr.File(label="Upload PDF file")
189
- max_length_slider = gr.Slider(512, 4096, value=2048, step=512, label="Max Length")
190
- summarize_pdf_btn = gr.Button(value="Summarize PDF")
191
- with gr.Row():
192
- output_pdf_summary = gr.Markdown(label="Summary", height=1000)
193
- summarize_pdf_btn.click(summarize_pdf, inputs=[input_pdf, max_length_slider], outputs=output_pdf_summary)
194
-
195
- # Tab for Arxiv Summarization
196
- with gr.Tab(label="Arxiv Summarization"):
197
- with gr.Column():
198
- arxiv_number = gr.Textbox(label="Enter arXiv number, i.e 2502.02523")
199
- summarize_btn = gr.Button(value="Summarize arXiv Paper")
200
- with gr.Column():
201
- output_summary = gr.Markdown(label="Summary", height=1000)
202
- summarize_btn.click(summarize_arxiv_pdf, inputs=arxiv_number, outputs=output_summary)
203
-
204
- # New Tab for Chat functionality
205
- with gr.Tab(label="Chat with Assistant"):
206
  gr.Markdown("### Chat with the Assistant")
207
  with gr.Row():
208
- chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
209
- send_button = gr.Button("Send")
210
- # A Markdown to display the conversation history (or you could use gr.Chatbot)
211
- chat_output = gr.Markdown(label="Chat Output", height=300)
212
- # Maintain chat history as a list of [user, assistant] pairs
213
- chat_history = gr.State([])
214
-
215
- # When the send button is clicked, update the chat history and get a response.
216
- def update_chat(user_message, history):
217
- # Append the new user message to history with an empty assistant response for now.
218
  history = history or []
219
  history.append([user_message, ""])
220
  return history, history
221
 
222
- def update_assistant_response(history):
223
- # Get the last user message and call the chat function
224
  user_message = history[-1][0]
225
  response = chat_with_replit_sync(user_message, history[:-1])
226
- # Update the last entry with the assistant's response
227
  history[-1][1] = response
228
- # Format the conversation for display
229
  formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
230
  return history, formatted
231
 
232
- send_button.click(update_chat, inputs=[chat_input, chat_history], outputs=[chat_history, chat_output])
233
- send_button.click(update_assistant_response, inputs=chat_history, outputs=[chat_history, chat_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  app.launch()
 
 
1
  import os
2
  import logging
3
  import gradio as gr
4
+ import asyncio
5
  from dotenv import load_dotenv
6
+ from langchain.document_loaders import ArxivLoader
7
  from langchain.text_splitter import TokenTextSplitter
8
  from langchain.vectorstores import Chroma
9
  from langchain_community.embeddings import HuggingFaceHubEmbeddings
 
 
10
  from langchain_groq import ChatGroq
 
11
  from PyPDF2 import PdfReader
12
  from huggingface_hub import login
13
  from groq import AsyncGroq, Groq
14
+ from langchain.docstore.document import Document # For creating a document from PDF text
15
 
16
  # Load environment variables
17
  load_dotenv()
 
32
  # Load models and embeddings
33
  embedding_model = HuggingFaceHubEmbeddings(huggingfacehub_api_token=HUGGING_API_KEY)
34
  llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  client = AsyncGroq(api_key=GROQ_API_KEY)
36
 
37
+ # -----------------------------
38
+ # Chat Functionality (General)
39
+ # -----------------------------
40
  async def chat_with_replit(message, history):
41
+ """General chat functionality using the Groq API."""
42
  try:
43
  messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
44
 
45
+ for chat in history or []:
46
  user_msg, assistant_msg = chat
47
  messages.append({"role": "user", "content": user_msg})
48
  messages.append({"role": "assistant", "content": assistant_msg})
 
55
  temperature=0,
56
  max_tokens=1024,
57
  top_p=1,
58
+ stream=False, # For simplicity we are not streaming
59
  )
60
  return response.choices[0].message.content
61
 
 
63
  logger.error(f"Chat error: {e}")
64
  return "Error in chat response."
65
 
66
+ def chat_with_replit_sync(message, history):
67
+ """Synchronous wrapper for general chat."""
68
+ return asyncio.run(chat_with_replit(message, history))
69
+
70
+ # -------------------------------------------------
71
+ # Chat Functionality for ArXiv Paper (Document Chat)
72
+ # -------------------------------------------------
73
+ async def chat_with_replit_arxiv(message, history, doi_num):
74
+ """Chat answering questions using an ArXiv paper as context."""
75
  try:
76
+ # Load the ArXiv document and split it into chunks
77
  loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
78
  documents = loader.load_and_split()
79
+ if not documents:
80
+ return "No documents found for the provided arXiv number."
81
  metadata = documents[0].metadata
82
 
83
+ # Create vector store for the loaded documents
84
  vector_store = Chroma.from_documents(documents, embedding_model)
85
 
86
  def retrieve_relevant_content(user_query):
 
107
  return response.choices[0].message.content
108
 
109
  except Exception as e:
110
+ logger.error(f"Error in chat with arXiv PDF: {e}")
111
+ return "Error processing chat with arXiv paper."
112
+
113
+ def chat_with_replit_arxiv_sync(message, history, doi_num):
114
+ """Synchronous wrapper for arXiv chat."""
115
+ return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))
116
+
117
+ # -------------------------------------------------
118
+ # Chat Functionality for Local PDF (Document Chat)
119
+ # -------------------------------------------------
120
+ async def chat_with_replit_local_pdf(message, history, pdf_file_path):
121
+ """Chat answering questions using a local PDF as context."""
122
+ try:
123
+ # Extract text from the uploaded PDF file
124
+ reader = PdfReader(pdf_file_path)
125
+ text = "\n".join(page.extract_text() or "" for page in reader.pages)
126
+ if not text.strip():
127
+ return "Could not extract text from PDF."
128
+
129
+ # Create a document from the PDF text
130
+ documents = [Document(page_content=text, metadata={"source": pdf_file_path})]
131
 
132
+ # Create a vector store using the document
133
+ vector_store = Chroma.from_documents(documents, embedding_model)
134
+
135
+ def retrieve_relevant_content(user_query):
136
+ results = vector_store.similarity_search(user_query, k=3)
137
+ return "\n\n".join(doc.page_content for doc in results)
138
 
139
+ relevant_content = retrieve_relevant_content(message)
140
+
141
+ messages = [
142
+ {"role": "user", "content": message},
143
+ {"role": "system", "content": f"Answer based on this PDF document: {pdf_file_path}.\n"
144
+ f"Relevant Content: {relevant_content}"}
145
+ ]
146
+
147
+ response = await client.chat.completions.create(
148
+ messages=messages,
149
+ model="llama3-70b-8192",
150
+ temperature=0,
151
+ max_tokens=1024,
152
+ top_p=1,
153
+ stream=False,
154
+ )
155
+ return response.choices[0].message.content
156
+
157
+ except Exception as e:
158
+ logger.error(f"Error in chat with local PDF: {e}")
159
+ return "Error processing chat with local PDF."
160
+
161
+ def chat_with_replit_local_pdf_sync(message, history, pdf_file):
162
+ """Synchronous wrapper for local PDF chat."""
163
+ return asyncio.run(chat_with_replit_local_pdf(message, history, pdf_file))
164
+
165
+ # ------------------------------------
166
+ # Gradio UI Integration
167
+ # ------------------------------------
168
  with gr.Blocks() as app:
169
+ # --- Tab: General Chat ---
170
+ with gr.Tab(label="General Chat"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  gr.Markdown("### Chat with the Assistant")
172
  with gr.Row():
173
+ general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
174
+ general_send_button = gr.Button("Send")
175
+ general_chat_output = gr.Markdown(label="Chat Output", height=300)
176
+ general_chat_history = gr.State([])
177
+
178
+ def update_general_chat(user_message, history):
179
+ # Append the new message with an empty assistant reply for now.
 
 
 
180
  history = history or []
181
  history.append([user_message, ""])
182
  return history, history
183
 
184
+ def update_general_response(history):
 
185
  user_message = history[-1][0]
186
  response = chat_with_replit_sync(user_message, history[:-1])
 
187
  history[-1][1] = response
 
188
  formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
189
  return history, formatted
190
 
191
+ general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
192
+ outputs=[general_chat_history, general_chat_output])
193
+ general_send_button.click(update_general_response, inputs=general_chat_history,
194
+ outputs=[general_chat_history, general_chat_output])
195
+
196
+ # --- Tab: Chat with ArXiv Paper ---
197
+ with gr.Tab(label="Chat with ArXiv Paper"):
198
+ gr.Markdown("### Ask Questions About an ArXiv Paper")
199
+ with gr.Row():
200
+ arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
201
+ arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
202
+ arxiv_send_button = gr.Button("Send")
203
+ arxiv_chat_output = gr.Markdown(label="Chat Output", height=300)
204
+ arxiv_chat_history = gr.State([])
205
+
206
+ def update_arxiv_chat(user_message, history):
207
+ history = history or []
208
+ history.append([user_message, ""])
209
+ return history, history
210
+
211
+ def update_arxiv_response(history, doi_num):
212
+ user_message = history[-1][0]
213
+ response = chat_with_replit_arxiv_sync(user_message, history[:-1], doi_num)
214
+ history[-1][1] = response
215
+ formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
216
+ return history, formatted
217
+
218
+ arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
219
+ outputs=[arxiv_chat_history, arxiv_chat_output])
220
+ arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
221
+ outputs=[arxiv_chat_history, arxiv_chat_output])
222
+
223
+ # --- Tab: Chat with Local PDF ---
224
+ with gr.Tab(label="Chat with Local PDF"):
225
+ gr.Markdown("### Ask Questions About an Uploaded PDF")
226
+ with gr.Row():
227
+ pdf_file_input = gr.File(label="Upload PDF file")
228
+ pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
229
+ pdf_send_button = gr.Button("Send")
230
+ pdf_chat_output = gr.Markdown(label="Chat Output", height=300)
231
+ pdf_chat_history = gr.State([])
232
+
233
+ def update_pdf_chat(user_message, history):
234
+ history = history or []
235
+ history.append([user_message, ""])
236
+ return history, history
237
+
238
+ def update_pdf_response(history, pdf_file):
239
+ user_message = history[-1][0]
240
+ response = chat_with_replit_local_pdf_sync(user_message, history[:-1], pdf_file)
241
+ history[-1][1] = response
242
+ formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
243
+ return history, formatted
244
+
245
+ pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
246
+ outputs=[pdf_chat_history, pdf_chat_output])
247
+ pdf_send_button.click(update_pdf_response, inputs=[pdf_chat_history, pdf_file_input],
248
+ outputs=[pdf_chat_history, pdf_chat_output])
249
 
250
  app.launch()
251
+