tdurzynski commited on
Commit
42866ce
·
verified ·
1 Parent(s): 9ca2091

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -103
app.py CHANGED
@@ -4,23 +4,21 @@ import gradio as gr
4
  import asyncio
5
  from dotenv import load_dotenv
6
  from langchain.document_loaders import ArxivLoader
7
- from langchain.text_splitter import TokenTextSplitter
8
  from langchain.vectorstores import Chroma
9
  from langchain_community.embeddings import HuggingFaceHubEmbeddings
10
  from langchain_groq import ChatGroq
11
  from PyPDF2 import PdfReader
12
  from huggingface_hub import login
13
- from groq import AsyncGroq, Groq
14
- from langchain.docstore.document import Document # For creating a document from PDF text
15
 
16
  # Load environment variables
17
  load_dotenv()
18
  HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
19
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20
 
21
- # Ensure API keys are set
22
  if not HUGGING_API_KEY or not GROQ_API_KEY:
23
- raise ValueError("API keys for HuggingFace or Groq are missing. Set them in your environment variables.")
24
 
25
  # Configure Logging
26
  logging.basicConfig(level=logging.INFO)
@@ -34,149 +32,106 @@ embedding_model = HuggingFaceHubEmbeddings(huggingfacehub_api_token=HUGGING_API_
34
  llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
35
  client = AsyncGroq(api_key=GROQ_API_KEY)
36
 
37
- # -----------------------------
38
- # Chat Functionality (General)
39
- # -----------------------------
 
 
40
  async def chat_with_replit(message, history):
41
- """General chat functionality using the Groq API."""
42
  try:
43
  messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
44
-
45
  for chat in history or []:
46
  user_msg, assistant_msg = chat
47
  messages.append({"role": "user", "content": user_msg})
48
  messages.append({"role": "assistant", "content": assistant_msg})
49
-
50
  messages.append({"role": "user", "content": message})
51
-
52
  response = await client.chat.completions.create(
53
- messages=messages,
54
- model="llama3-70b-8192",
55
- temperature=0,
56
- max_tokens=1024,
57
- top_p=1,
58
- stream=False, # For simplicity we are not streaming
59
  )
60
  return response.choices[0].message.content
61
-
62
  except Exception as e:
63
  logger.error(f"Chat error: {e}")
64
  return "Error in chat response."
65
 
66
  def chat_with_replit_sync(message, history):
67
- """Synchronous wrapper for general chat."""
68
  return asyncio.run(chat_with_replit(message, history))
69
 
70
- # -------------------------------------------------
71
- # Chat Functionality for ArXiv Paper (Document Chat)
72
- # -------------------------------------------------
73
  async def chat_with_replit_arxiv(message, history, doi_num):
74
- """Chat answering questions using an ArXiv paper as context."""
75
  try:
76
- # Load the ArXiv document and split it into chunks
77
  loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
78
  documents = loader.load_and_split()
79
  if not documents:
80
  return "No documents found for the provided arXiv number."
81
  metadata = documents[0].metadata
82
-
83
- # Create vector store for the loaded documents
84
  vector_store = Chroma.from_documents(documents, embedding_model)
85
-
86
- def retrieve_relevant_content(user_query):
87
- results = vector_store.similarity_search(user_query, k=3)
88
- return "\n\n".join(doc.page_content for doc in results)
89
-
90
- relevant_content = retrieve_relevant_content(message)
91
-
92
  messages = [
93
  {"role": "user", "content": message},
94
- {"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\n"
95
- f"Metadata: {metadata}.\n"
96
- f"Relevant Content: {relevant_content}"}
97
  ]
98
-
99
  response = await client.chat.completions.create(
100
- messages=messages,
101
- model="llama3-70b-8192",
102
- temperature=0,
103
- max_tokens=1024,
104
- top_p=1,
105
- stream=False,
106
  )
107
  return response.choices[0].message.content
108
-
109
  except Exception as e:
110
- logger.error(f"Error in chat with arXiv PDF: {e}")
111
  return "Error processing chat with arXiv paper."
112
 
113
  def chat_with_replit_arxiv_sync(message, history, doi_num):
114
- """Synchronous wrapper for arXiv chat."""
115
  return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))
116
 
117
- # -------------------------------------------------
118
- # Chat Functionality for Local PDF (Document Chat)
119
- # -------------------------------------------------
120
- async def chat_with_replit_local_pdf(message, history, pdf_file_path):
121
- """Chat answering questions using a local PDF as context."""
122
  try:
123
- # Extract text from the uploaded PDF file
124
- reader = PdfReader(pdf_file_path)
125
- text = "\n".join(page.extract_text() or "" for page in reader.pages)
126
- if not text.strip():
127
- return "Could not extract text from PDF."
128
-
129
- # Create a document from the PDF text
130
- documents = [Document(page_content=text, metadata={"source": pdf_file_path})]
131
-
132
- # Create a vector store using the document
133
- vector_store = Chroma.from_documents(documents, embedding_model)
134
-
135
- def retrieve_relevant_content(user_query):
136
- results = vector_store.similarity_search(user_query, k=3)
137
- return "\n\n".join(doc.page_content for doc in results)
138
-
139
- relevant_content = retrieve_relevant_content(message)
140
-
141
  messages = [
142
  {"role": "user", "content": message},
143
- {"role": "system", "content": f"Answer based on this PDF document: {pdf_file_path}.\n"
144
- f"Relevant Content: {relevant_content}"}
145
  ]
146
-
147
  response = await client.chat.completions.create(
148
- messages=messages,
149
- model="llama3-70b-8192",
150
- temperature=0,
151
- max_tokens=1024,
152
- top_p=1,
153
- stream=False,
154
  )
155
  return response.choices[0].message.content
156
-
157
  except Exception as e:
158
  logger.error(f"Error in chat with local PDF: {e}")
159
  return "Error processing chat with local PDF."
160
 
161
- def chat_with_replit_local_pdf_sync(message, history, pdf_file):
162
- """Synchronous wrapper for local PDF chat."""
163
- return asyncio.run(chat_with_replit_local_pdf(message, history, pdf_file))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- # ------------------------------------
166
- # Gradio UI Integration
167
- # ------------------------------------
168
  with gr.Blocks() as app:
169
- # --- Tab: General Chat ---
170
  with gr.Tab(label="General Chat"):
171
  gr.Markdown("### Chat with the Assistant")
172
  with gr.Row():
173
  general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
174
  general_send_button = gr.Button("Send")
175
- general_chat_output = gr.Markdown(label="Chat Output", height=300)
176
  general_chat_history = gr.State([])
177
 
178
  def update_general_chat(user_message, history):
179
- # Append the new message with an empty assistant reply for now.
180
  history = history or []
181
  history.append([user_message, ""])
182
  return history, history
@@ -189,18 +144,18 @@ with gr.Blocks() as app:
189
  return history, formatted
190
 
191
  general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
192
- outputs=[general_chat_history, general_chat_output])
193
  general_send_button.click(update_general_response, inputs=general_chat_history,
194
- outputs=[general_chat_history, general_chat_output])
195
 
196
- # --- Tab: Chat with ArXiv Paper ---
197
  with gr.Tab(label="Chat with ArXiv Paper"):
198
  gr.Markdown("### Ask Questions About an ArXiv Paper")
199
  with gr.Row():
200
  arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
201
  arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
202
  arxiv_send_button = gr.Button("Send")
203
- arxiv_chat_output = gr.Markdown(label="Chat Output", height=300)
204
  arxiv_chat_history = gr.State([])
205
 
206
  def update_arxiv_chat(user_message, history):
@@ -216,18 +171,19 @@ with gr.Blocks() as app:
216
  return history, formatted
217
 
218
  arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
219
- outputs=[arxiv_chat_history, arxiv_chat_output])
220
  arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
221
- outputs=[arxiv_chat_history, arxiv_chat_output])
222
 
223
- # --- Tab: Chat with Local PDF ---
224
  with gr.Tab(label="Chat with Local PDF"):
225
  gr.Markdown("### Ask Questions About an Uploaded PDF")
 
 
226
  with gr.Row():
227
- pdf_file_input = gr.File(label="Upload PDF file")
228
  pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
229
  pdf_send_button = gr.Button("Send")
230
- pdf_chat_output = gr.Markdown(label="Chat Output", height=300)
231
  pdf_chat_history = gr.State([])
232
 
233
  def update_pdf_chat(user_message, history):
@@ -235,17 +191,18 @@ with gr.Blocks() as app:
235
  history.append([user_message, ""])
236
  return history, history
237
 
238
- def update_pdf_response(history, pdf_file):
239
  user_message = history[-1][0]
240
- response = chat_with_replit_local_pdf_sync(user_message, history[:-1], pdf_file)
241
  history[-1][1] = response
242
  formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
243
  return history, formatted
244
 
 
245
  pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
246
- outputs=[pdf_chat_history, pdf_chat_output])
247
- pdf_send_button.click(update_pdf_response, inputs=[pdf_chat_history, pdf_file_input],
248
- outputs=[pdf_chat_history, pdf_chat_output])
249
 
250
  app.launch()
251
 
 
4
  import asyncio
5
  from dotenv import load_dotenv
6
  from langchain.document_loaders import ArxivLoader
 
7
  from langchain.vectorstores import Chroma
8
  from langchain_community.embeddings import HuggingFaceHubEmbeddings
9
  from langchain_groq import ChatGroq
10
  from PyPDF2 import PdfReader
11
  from huggingface_hub import login
12
+ from groq import AsyncGroq
13
+ from langchain.docstore.document import Document
14
 
15
  # Load environment variables
16
  load_dotenv()
17
  HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
18
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
19
 
 
20
  if not HUGGING_API_KEY or not GROQ_API_KEY:
21
+ raise ValueError("API keys for HuggingFace or Groq are missing.")
22
 
23
  # Configure Logging
24
  logging.basicConfig(level=logging.INFO)
 
32
  llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)
33
  client = AsyncGroq(api_key=GROQ_API_KEY)
34
 
35
+ # Global state for PDF vector store
36
+ pdf_vector_store = None
37
+ current_pdf_path = None
38
+
39
+ # General Chat (unchanged)
40
  async def chat_with_replit(message, history):
 
41
  try:
42
  messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
 
43
  for chat in history or []:
44
  user_msg, assistant_msg = chat
45
  messages.append({"role": "user", "content": user_msg})
46
  messages.append({"role": "assistant", "content": assistant_msg})
 
47
  messages.append({"role": "user", "content": message})
 
48
  response = await client.chat.completions.create(
49
+ messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
 
 
 
 
 
50
  )
51
  return response.choices[0].message.content
 
52
  except Exception as e:
53
  logger.error(f"Chat error: {e}")
54
  return "Error in chat response."
55
 
56
  def chat_with_replit_sync(message, history):
 
57
  return asyncio.run(chat_with_replit(message, history))
58
 
59
+ # ArXiv Chat (unchanged)
 
 
60
  async def chat_with_replit_arxiv(message, history, doi_num):
 
61
  try:
 
62
  loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
63
  documents = loader.load_and_split()
64
  if not documents:
65
  return "No documents found for the provided arXiv number."
66
  metadata = documents[0].metadata
 
 
67
  vector_store = Chroma.from_documents(documents, embedding_model)
68
+ results = vector_store.similarity_search(message, k=3)
69
+ relevant_content = "\n\n".join(doc.page_content for doc in results)
 
 
 
 
 
70
  messages = [
71
  {"role": "user", "content": message},
72
+ {"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\nMetadata: {metadata}.\nRelevant Content: {relevant_content}"}
 
 
73
  ]
 
74
  response = await client.chat.completions.create(
75
+ messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
 
 
 
 
 
76
  )
77
  return response.choices[0].message.content
 
78
  except Exception as e:
79
+ logger.error(f"Error in chat with ArXiv PDF: {e}")
80
  return "Error processing chat with arXiv paper."
81
 
82
  def chat_with_replit_arxiv_sync(message, history, doi_num):
 
83
  return asyncio.run(chat_with_replit_arxiv(message, history, doi_num))
84
 
85
+ # Local PDF Chat
86
+ async def chat_with_replit_local_pdf(message, vector_store):
 
 
 
87
  try:
88
+ if not vector_store:
89
+ return "Please upload a PDF first and wait for processing to complete."
90
+ results = vector_store.similarity_search(message, k=3)
91
+ relevant_content = "\n\n".join(doc.page_content for doc in results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  messages = [
93
  {"role": "user", "content": message},
94
+ {"role": "system", "content": f"Answer based on the uploaded PDF.\nRelevant Content: {relevant_content}"}
 
95
  ]
 
96
  response = await client.chat.completions.create(
97
+ messages=messages, model="llama3-70b-8192", temperature=0, max_tokens=1024, top_p=1, stream=False
 
 
 
 
 
98
  )
99
  return response.choices[0].message.content
 
100
  except Exception as e:
101
  logger.error(f"Error in chat with local PDF: {e}")
102
  return "Error processing chat with local PDF."
103
 
104
+ def process_pdf(pdf_file):
105
+ global pdf_vector_store, current_pdf_path
106
+ try:
107
+ if pdf_file != current_pdf_path:
108
+ logger.info("Extracting text from PDF...")
109
+ reader = PdfReader(pdf_file)
110
+ text = "\n".join(page.extract_text() or "" for page in reader.pages)
111
+ if not text.strip():
112
+ return "Could not extract text from PDF."
113
+ documents = [Document(page_content=text, metadata={"source": pdf_file})]
114
+ logger.info("Creating vector store...")
115
+ pdf_vector_store = Chroma.from_documents(documents, embedding_model)
116
+ current_pdf_path = pdf_file
117
+ return "PDF processed successfully. You can now ask questions."
118
+ return "PDF already processed. Ask away!"
119
+ except Exception as e:
120
+ logger.error(f"Error processing PDF: {e}")
121
+ return f"Error processing PDF: {str(e)}"
122
 
123
+ # Gradio UI
 
 
124
  with gr.Blocks() as app:
125
+ # General Chat (unchanged)
126
  with gr.Tab(label="General Chat"):
127
  gr.Markdown("### Chat with the Assistant")
128
  with gr.Row():
129
  general_chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
130
  general_send_button = gr.Button("Send")
131
+ general_chat_output = gr.Markdown(label="Chat Output")
132
  general_chat_history = gr.State([])
133
 
134
  def update_general_chat(user_message, history):
 
135
  history = history or []
136
  history.append([user_message, ""])
137
  return history, history
 
144
  return history, formatted
145
 
146
  general_send_button.click(update_general_chat, inputs=[general_chat_input, general_chat_history],
147
+ outputs=[general_chat_history, general_chat_output])
148
  general_send_button.click(update_general_response, inputs=general_chat_history,
149
+ outputs=[general_chat_history, general_chat_output])
150
 
151
+ # ArXiv Chat (unchanged)
152
  with gr.Tab(label="Chat with ArXiv Paper"):
153
  gr.Markdown("### Ask Questions About an ArXiv Paper")
154
  with gr.Row():
155
  arxiv_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
156
  arxiv_doi = gr.Textbox(placeholder="Enter arXiv number, e.g. 2502.02523", label="ArXiv Number")
157
  arxiv_send_button = gr.Button("Send")
158
+ arxiv_chat_output = gr.Markdown(label="Chat Output")
159
  arxiv_chat_history = gr.State([])
160
 
161
  def update_arxiv_chat(user_message, history):
 
171
  return history, formatted
172
 
173
  arxiv_send_button.click(update_arxiv_chat, inputs=[arxiv_input, arxiv_chat_history],
174
+ outputs=[arxiv_chat_history, arxiv_chat_output])
175
  arxiv_send_button.click(update_arxiv_response, inputs=[arxiv_chat_history, arxiv_doi],
176
+ outputs=[arxiv_chat_history, arxiv_chat_output])
177
 
178
+ # Local PDF Chat
179
  with gr.Tab(label="Chat with Local PDF"):
180
  gr.Markdown("### Ask Questions About an Uploaded PDF")
181
+ pdf_file_input = gr.File(label="Upload PDF file", file_types=[".pdf"])
182
+ pdf_status = gr.Textbox(label="PDF Processing Status", interactive=False)
183
  with gr.Row():
 
184
  pdf_chat_input = gr.Textbox(placeholder="Enter your question here...", label="Your Question")
185
  pdf_send_button = gr.Button("Send")
186
+ pdf_chat_output = gr.Markdown(label="Chat Output")
187
  pdf_chat_history = gr.State([])
188
 
189
  def update_pdf_chat(user_message, history):
 
191
  history.append([user_message, ""])
192
  return history, history
193
 
194
+ def update_pdf_response(history):
195
  user_message = history[-1][0]
196
+ response = asyncio.run(chat_with_replit_local_pdf(user_message, pdf_vector_store))
197
  history[-1][1] = response
198
  formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
199
  return history, formatted
200
 
201
+ pdf_file_input.change(process_pdf, inputs=pdf_file_input, outputs=pdf_status)
202
  pdf_send_button.click(update_pdf_chat, inputs=[pdf_chat_input, pdf_chat_history],
203
+ outputs=[pdf_chat_history, pdf_chat_output])
204
+ pdf_send_button.click(update_pdf_response, inputs=pdf_chat_history,
205
+ outputs=[pdf_chat_history, pdf_chat_output])
206
 
207
  app.launch()
208